Skip to content

Commit f731d86

Browse files
feat: allowing for multiple rust source files.
1 parent e36984f commit f731d86

File tree

6 files changed

+2190
-38
lines changed

6 files changed

+2190
-38
lines changed

crates/stdarch-gen-wasm/src/cli.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ pub struct Args {
99

1010
/// The Rust source file path argument
1111
#[arg(short, long)]
12-
pub rust: String,
12+
pub rust: Vec<String>,
1313
}

crates/stdarch-gen-wasm/src/main.rs

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use crate::structs::{CIntrinsic, RustIntrinsic};
1414
use crate::utils::leaf_nodes_from_grammar_name;
1515

1616
/// Read the Rust source code and returns its AST
17-
fn process_rust_code(source: &String) -> (String, Tree) {
17+
fn process_rust_code(source: String) -> (String, Tree) {
1818
let mut parser = Parser::new();
1919
parser
2020
.set_language(&tree_sitter_rust::LANGUAGE.into())
@@ -31,7 +31,7 @@ fn process_rust_code(source: &String) -> (String, Tree) {
3131
}
3232

3333
/// Reads the C source code and returns its AST
34-
fn process_c_code(source: &String) -> (String, Tree) {
34+
fn process_c_code(source: String) -> (String, Tree) {
3535
let mut parser = Parser::new();
3636
parser
3737
.set_language(&tree_sitter_c::LANGUAGE.into())
@@ -76,7 +76,7 @@ rust-return-type = {}",
7676
}
7777

7878
/// Create the spec sheet.
79-
///
79+
///
8080
/// Fields that would be present in the spec sheet:
8181
/// 1. c-intrinsic-name
8282
/// 2. c-arguments
@@ -92,22 +92,33 @@ fn main() {
9292
// Read the file-paths from CLI arguments
9393
// obtain the tree of tokens from the code
9494
let args = Args::parse();
95-
let (c_source, c_tree) = process_c_code(&args.c);
96-
let (rust_source, rust_tree) = process_rust_code(&args.rust);
97-
95+
let (c_source, c_tree) = process_c_code(args.c);
9896
let preproc_node = c_tree.root_node();
9997

10098
let c_intrinsics = leaf_nodes_from_grammar_name(preproc_node, "function_definition")
10199
.iter()
102100
.map(|&node| CIntrinsic::new(node, &c_source))
103101
.collect::<Vec<_>>();
104102

105-
let mut rust_cursor = rust_tree.root_node().walk();
106-
let rust_intrinsics = rust_tree
107-
.root_node()
108-
.children(&mut rust_cursor)
109-
.filter(|node| node.grammar_name() == "function_item")
110-
.map(|node| RustIntrinsic::new(node, &rust_source))
103+
let rust_intrinsics_interim = args
104+
.rust
105+
.into_iter()
106+
.map(|path| process_rust_code(path))
107+
.collect::<Vec<_>>();
108+
109+
let rust_intrinsics = rust_intrinsics_interim
110+
.iter()
111+
.map(|(rust_source, rust_tree)| {
112+
let rust_source_str = rust_source.as_str();
113+
let mut rust_cursor = rust_tree.root_node().walk();
114+
rust_tree
115+
.root_node()
116+
.children(&mut rust_cursor)
117+
.filter(|node| node.grammar_name() == "function_item")
118+
.map(|node| RustIntrinsic::new(node, rust_source_str))
119+
.collect::<Vec<_>>()
120+
})
121+
.flatten()
111122
.collect::<Vec<_>>();
112123

113124
let matching_intrinsics = match_intrinsic_definitions(&c_intrinsics, &rust_intrinsics);

crates/stdarch-gen-wasm/src/matcher.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::collections::HashSet;
33
use crate::structs::{CIntrinsic, RustIntrinsic};
44

55
/// Matches the set of intrinsics in Rust to their C counterpart.
6-
///
6+
///
77
/// This function assumes that the list of Rust definitions
88
/// will be a subset of the list of definitions in C.
99
pub fn match_intrinsic_definitions<'a>(
@@ -33,10 +33,10 @@ pub fn match_intrinsic_definitions<'a>(
3333
fn match_intrinsic_definition(c_definition: &str, rust_definition: &str) -> bool {
3434
// Most intrinsics in C are of the format: `wasm_v128_load`.
3535
// Its Rust counterpart is named `v128_load`.
36-
//
36+
//
3737
// Another one is `wasm_i8x16_const_splat`, and its Rust counterpart is `i8x16_splat`.
38-
//
39-
// The pattern that is observed is that, each keyword "chunk" that constructs
38+
//
39+
// The pattern that is observed is that, each keyword "chunk" that constructs
4040
// the intrinsic name in Rust will also be used to construct the intrinsic name in C.
4141
// These names are constructed by joining the chunks with an underscore (_).
4242

crates/stdarch-gen-wasm/src/structs.rs

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@ pub struct CIntrinsic<'a> {
2222
}
2323

2424
impl<'a> CIntrinsic<'a> {
25-
pub fn new(node: Node, source: &'a String) -> Self {
25+
pub fn new(node: Node, source: &'a str) -> Self {
2626
// Take an intrinsic definition for example:
27-
//
27+
//
2828
// static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_make(uint32_t __c0, uint32_t __c1, uint32_t __c2, uint32_t __c3) {...}
29-
//
29+
//
3030
// For a C intrinsic, the immediate children
3131
// would have their grammar names as:
32-
//
32+
//
3333
// "storage_class_specifier" (which is `static`)
3434
// "storage_class_specifier" (which is `__inline__`)
3535
// "identifier" (which is `v128_t`. The parser doesn't recognize that it is a type, instead thinks that it is an identifier)
@@ -54,7 +54,7 @@ impl<'a> CIntrinsic<'a> {
5454

5555
// The immediate children of the `function_declarator` node would have
5656
// their grammar as follows:
57-
//
57+
//
5858
// "identifier" (which is the intrinsic name)
5959
// "parameter_list" (which is the arguments to the intrinsic)
6060
let declarator_node = node
@@ -64,27 +64,27 @@ impl<'a> CIntrinsic<'a> {
6464

6565
// The immediate children of a `parameter_list` node would have
6666
// their grammar as follows (assuming 2 arguments):
67-
//
67+
//
6868
// "(" -> The opening bracket that denotes the start of the arguments definition
6969
// "parameter_declaration" -> The definition for the first argument
7070
// "," -> The comma that separates the first and the second arguments
7171
// "parameter_declaration" -> The definition for the first argument
7272
// ")" -> The closing bracket that denotes the start of the arguments definition
73-
//
73+
//
7474
// Each node with grammar name as `parameter_declaration` could have their children as
7575
// (incase of `int x`):
7676
// 1. "primitive_type" -> Points to `int`
7777
// 2. "indentifier" -> Points to `x`
78-
//
78+
//
7979
// or have (incase of `v128_t x`):
8080
// 1. "identifier" -> Points to `v128_t` which is actually a type (but the parser is unaware of it)
8181
// 2. "identifier" -> Points to `x`
82-
//
82+
//
8383
// or have (incase of `const void *__mem`):
8484
// 1. "type_qualifier" -> Points to `const`
8585
// 2. "primitive_type" -> Points to `void`
8686
// 3. "pointer_declarator" -> breaks down into "*" and "identifier" (which is `__mem`)
87-
//
87+
//
8888
let intrinsic_name = source
8989
.get(
9090
declarator_node
@@ -108,9 +108,9 @@ impl<'a> CIntrinsic<'a> {
108108
// Since the type could be identified as either `primitive_type, `indentifier`,
109109
// or a combination of `type_qualifier`, `primitive_type` and `*` (in the case of "const void *")
110110
// this approach first calculates the end index (which is right before the start of an argument variable)
111-
//
111+
//
112112
// And then searches backwards until it finds a break (either a comma
113-
// or the opening bracket). The entire portion contained within this range
113+
// or the opening bracket). The entire portion contained within this range
114114
// is then considered as the type of the argument.
115115
let end_index = arg_name_node.byte_range().start;
116116
let start_index = source
@@ -149,10 +149,10 @@ impl<'a> CIntrinsic<'a> {
149149
}
150150

151151
impl<'a> RustIntrinsic<'a> {
152-
pub fn new(node: Node, source: &'a String) -> Self {
152+
pub fn new(node: Node, source: &'a str) -> Self {
153153
// For a Rust intrinsic, the immediate children
154154
// would have their grammar names as:
155-
//
155+
//
156156
// 1. "visibility_modifier" (for `pub`)
157157
// 2. "function_modifiers" (for `unsafe`. May not always be present)
158158
// 3. "fn" (the actual keyword `fn`)
@@ -162,7 +162,7 @@ impl<'a> RustIntrinsic<'a> {
162162
// 7. "->" (the arrow used to specify return type)
163163
// 8. "identifier" (the return type of the function)
164164
// 9. "block" (the body of the function)
165-
//
165+
//
166166
let mut cursor = node.walk();
167167
let intrinsic_name = source
168168
.get(
@@ -196,14 +196,13 @@ impl<'a> RustIntrinsic<'a> {
196196
if let Some(generic_args) = generic_args {
197197
// The children of this node have their grammar_names as the following
198198
// (assuming 2 generic arguments):
199-
//
199+
//
200200
// "<" (The opening angle bracket that starts the generic arguments definition)
201201
// "const_parameter" (The first const generic argument)
202202
// "," (The comma that denotes the end of definition of the first const generic argument)
203203
// "const_parameter" (The second const generic argument)
204204
// ">" (The closing angle bracket that concludes the generic arguments definition)
205-
//
206-
205+
//
207206
(generic_arg_names, generic_arg_types) = generic_args
208207
.children(&mut cursor)
209208
.filter(|arg| arg.grammar_name() == "const_parameter")
@@ -225,13 +224,12 @@ impl<'a> RustIntrinsic<'a> {
225224
if let Some(args) = args {
226225
// The children of this node have their grammar_names as the following
227226
// (assuming 2 generic arguments):
228-
//
227+
//
229228
// "(" (The opening circular bracket that starts the arguments definition)
230229
// "parameter" (The first argument)
231230
// "," (The comma that denotes the end of definition of the first argument)
232231
// "parameter" (The second argument)
233232
// ")" (The closing circular bracket that concludes the arguments definition)
234-
//
235233
(arg_names, arg_types) = args
236234
.children(&mut cursor)
237235
.filter(|arg| arg.grammar_name() == "parameter")

crates/stdarch-gen-wasm/src/utils.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use tree_sitter::Node;
22

3-
/// Recursively searches the node and its children for a node
3+
/// Recursively searches the node and its children for a node
44
/// that matches its grammar name, using Depth-first search.
55
pub fn leaf_nodes_from_grammar_name<'a>(node: Node<'a>, name: &str) -> Vec<Node<'a>> {
66
if node.grammar_name() == name {

0 commit comments

Comments
 (0)