@@ -3,15 +3,12 @@ extern crate alloc;
33use alloc:: string:: String ;
44use alloc:: vec:: Vec ;
55
6- /// A simple token type.
76enum Token {
87 LParen ,
98 RParen ,
109 Atom ( String ) ,
1110}
1211
13- /// Given an input string slice, break it into tokens.
14- /// String–literals (delimited by quotes) are kept as a single atom.
1512fn tokenize ( input : & str ) -> Vec < Token > {
1613 let mut tokens = Vec :: new ( ) ;
1714 let mut chars = input. chars ( ) . peekable ( ) ;
@@ -24,7 +21,6 @@ fn tokenize(input: &str) -> Vec<Token> {
2421 } else if c == ')' {
2522 tokens. push ( Token :: RParen ) ;
2623 } else if c == '"' {
27- // Read a string literal including the quotes.
2824 let mut s = String :: new ( ) ;
2925 s. push ( '"' ) ;
3026 while let Some ( & next) = chars. peek ( ) {
@@ -36,7 +32,6 @@ fn tokenize(input: &str) -> Vec<Token> {
3632 }
3733 tokens. push ( Token :: Atom ( s) ) ;
3834 } else {
39- // Read an atom until whitespace or a parenthesis is encountered.
4035 let mut s = String :: new ( ) ;
4136 s. push ( c) ;
4237 while let Some ( & next) = chars. peek ( ) {
@@ -53,14 +48,11 @@ fn tokenize(input: &str) -> Vec<Token> {
5348 tokens
5449}
5550
56- /// An AST node representing either an atom or a list of nodes.
5751enum Node {
5852 Atom ( String ) ,
5953 List ( Vec < Node > ) ,
6054}
6155
62- /// A recursive parser that builds a (possibly malformed) AST.
63- /// It is tolerant – if extra closing parentheses occur it simply produces an Atom.
6456fn parse_node ( tokens : & [ Token ] , mut i : usize ) -> ( Node , usize ) {
6557 if i >= tokens. len ( ) {
6658 return ( Node :: Atom ( String :: new ( ) ) , i) ;
@@ -84,15 +76,11 @@ fn parse_node(tokens: &[Token], mut i: usize) -> (Node, usize) {
8476 }
8577 ( Node :: List ( children) , i)
8678 }
87- Token :: RParen => {
88- // Stray closing parenthesis: output it as an atom.
89- ( Node :: Atom ( String :: from ( ")" ) ) , i + 1 )
90- }
79+ Token :: RParen => ( Node :: Atom ( String :: from ( ")" ) ) , i + 1 ) ,
9180 Token :: Atom ( ref s) => ( Node :: Atom ( s. clone ( ) ) , i + 1 ) ,
9281 }
9382}
9483
95- /// Parse all tokens into a vector of nodes.
9684fn parse_all ( tokens : & [ Token ] ) -> Vec < Node > {
9785 let mut nodes = Vec :: new ( ) ;
9886 let mut i = 0 ;
@@ -104,7 +92,6 @@ fn parse_all(tokens: &[Token]) -> Vec<Node> {
10492 nodes
10593}
10694
107- /// Returns a string with `indent` levels (2 spaces per level).
10895fn indent_str ( indent : usize ) -> String {
10996 let mut s = String :: new ( ) ;
11097 for _ in 0 ..indent {
@@ -113,20 +100,17 @@ fn indent_str(indent: usize) -> String {
113100 s
114101}
115102
116- /// Returns true if the given node is “flat” (an atom or a list whose children are all flat).
117103fn is_flat_node ( node : & Node ) -> bool {
118104 match node {
119105 Node :: Atom ( _) => true ,
120106 Node :: List ( children) => children. iter ( ) . all ( is_flat_node) ,
121107 }
122108}
123109
124- /// Returns true if every node in the slice is flat.
125110fn is_flat_list ( nodes : & [ Node ] ) -> bool {
126111 nodes. iter ( ) . all ( is_flat_node)
127112}
128113
129- /// Format a node “inline” (without inserting any newlines).
130114fn format_node_inline ( node : & Node ) -> String {
131115 match node {
132116 Node :: Atom ( s) => s. clone ( ) ,
@@ -147,7 +131,6 @@ fn format_node_inline(node: &Node) -> String {
147131 }
148132}
149133
150- /// If a node is flat then return its inline formatting.
151134#[ allow( dead_code) ]
152135fn format_inline ( node : & Node ) -> Option < String > {
153136 if is_flat_node ( node) {
@@ -157,8 +140,6 @@ fn format_inline(node: &Node) -> Option<String> {
157140 }
158141}
159142
160- /// Returns true if the first atom of a list node is one of these keywords.
161- /// Such nodes (like `(export ...)`, `(param ...)`, `(result ...)`) are inlined when part of a func signature.
162143fn is_inline_signature ( node : & Node ) -> bool {
163144 if let Node :: List ( children) = node {
164145 if let Some ( Node :: Atom ( ref keyword) ) = children. first ( ) {
@@ -168,9 +149,6 @@ fn is_inline_signature(node: &Node) -> bool {
168149 false
169150}
170151
171- /// A simple heuristic: returns true if the given token is an opcode.
172- /// In our case, an opcode is an atom that does not start with '$',
173- /// is not a numeric literal, and is not a string literal.
174152fn is_opcode ( token : & str ) -> bool {
175153 if token. starts_with ( '$' ) {
176154 return false ;
@@ -180,7 +158,6 @@ fn is_opcode(token: &str) -> bool {
180158 }
181159 let mut chars = token. chars ( ) ;
182160 if let Some ( first) = chars. next ( ) {
183- // Check for a numeric literal: allow an optional sign then digits.
184161 if ( first == '-' || first == '+' ) && chars. clone ( ) . all ( |c| c. is_ascii_digit ( ) ) {
185162 return false ;
186163 }
@@ -191,16 +168,13 @@ fn is_opcode(token: &str) -> bool {
191168 true
192169}
193170
194- /// Helper that groups consecutive instruction tokens (atoms) in a list.
195- /// For each opcode atom, it appends any following atoms that are not opcodes.
196171fn format_instructions ( nodes : & [ Node ] , indent : usize ) -> String {
197172 let mut result = String :: new ( ) ;
198173 let mut i = 0 ;
199174 while i < nodes. len ( ) {
200175 match & nodes[ i] {
201176 Node :: Atom ( token) => {
202177 if is_opcode ( token) {
203- // Start a new instruction line.
204178 let mut line = token. clone ( ) ;
205179 i += 1 ;
206180 while i < nodes. len ( ) {
@@ -220,15 +194,13 @@ fn format_instructions(nodes: &[Node], indent: usize) -> String {
220194 result. push_str ( & indent_str ( indent) ) ;
221195 result. push_str ( & line) ;
222196 } else {
223- // Non-opcode atom printed on its own line.
224197 result. push ( '\n' ) ;
225198 result. push_str ( & indent_str ( indent) ) ;
226199 result. push_str ( token) ;
227200 i += 1 ;
228201 }
229202 }
230203 Node :: List ( _) => {
231- // For a nested list, simply delegate to format_node.
232204 result. push ( '\n' ) ;
233205 result. push_str ( & indent_str ( indent) ) ;
234206 result. push_str ( & format_node ( & nodes[ i] , indent) ) ;
@@ -239,14 +211,6 @@ fn format_instructions(nodes: &[Node], indent: usize) -> String {
239211 result
240212}
241213
242- /// Recursively format a node with the given indent level.
243- ///
244- /// Special rules:
245- /// - A `(module …)` prints its children on new indented lines.
246- /// - A `(func …)` prints its signature groups inline and then uses `format_instructions`
247- /// for the remaining function body.
248- /// - A `(forall …)` is handled similarly, grouping its children (after the first "forall" atom)
249- /// as instructions.
250214fn format_node ( node : & Node , indent : usize ) -> String {
251215 match node {
252216 Node :: Atom ( s) => s. clone ( ) ,
@@ -271,28 +235,29 @@ fn format_node(node: &Node, indent: usize) -> String {
271235 } else if ident == "func" {
272236 let mut s = String :: new ( ) ;
273237 s. push ( '(' ) ;
274- // Always inline the first element ("func")
275238 s. push_str ( & format_node_inline ( & children[ 0 ] ) ) ;
276239 let mut i = 1 ;
277- // Inline any signature tokens like (export ...), (param ...), (result ...).
278240 while i < children. len ( ) {
279- if is_inline_signature ( & children[ i] ) {
280- s. push ( ' ' ) ;
281- s. push_str ( & format_node_inline ( & children[ i] ) ) ;
282- i += 1 ;
283- } else {
284- break ;
241+ match & children[ i] {
242+ Node :: Atom ( _) => {
243+ s. push ( ' ' ) ;
244+ s. push_str ( & format_node_inline ( & children[ i] ) ) ;
245+ i += 1 ;
246+ }
247+ Node :: List ( _) if is_inline_signature ( & children[ i] ) => {
248+ s. push ( ' ' ) ;
249+ s. push_str ( & format_node_inline ( & children[ i] ) ) ;
250+ i += 1 ;
251+ }
252+ _ => break ,
285253 }
286254 }
287- // Process the remaining children as the function body,
288- // grouping consecutive instruction tokens.
289255 s. push_str ( & format_instructions ( & children[ i..] , indent + 1 ) ) ;
290256 s. push ( '\n' ) ;
291257 s. push_str ( & indent_str ( indent) ) ;
292258 s. push ( ')' ) ;
293259 return s;
294260 } else if [ "forall" , "exists" , "assume" , "unique" ] . contains ( & ident. as_str ( ) ) {
295- // For a forall block, print the first atom inline then group its remaining children.
296261 let mut s = String :: new ( ) ;
297262 s. push ( '(' ) ;
298263 s. push_str ( ident) ;
@@ -303,7 +268,6 @@ fn format_node(node: &Node, indent: usize) -> String {
303268 return s;
304269 }
305270 }
306- // For any other list: if it is flat, print inline; otherwise, one element per line.
307271 if is_flat_list ( children) {
308272 format_node_inline ( node)
309273 } else {
@@ -329,10 +293,6 @@ fn format_node(node: &Node, indent: usize) -> String {
329293 }
330294}
331295
332- /// The public function to format unformatted WAT code.
333- ///
334- /// This function receives unformatted WAT as an `&str` and returns a formatted `String`.
335- /// Even if the input is malformed, the formatter does its best.
336296pub fn format ( input : & str ) -> String {
337297 let tokens = tokenize ( input) ;
338298 let nodes = parse_all ( & tokens) ;
0 commit comments