Skip to content

Commit e36984f

Browse files
feat: created stdarch-gen-wasm to create spec file for wasm32
intrinsics
1 parent 97bf36d commit e36984f

File tree

7 files changed

+546
-6
lines changed

7 files changed

+546
-6
lines changed

Cargo.lock

Lines changed: 62 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/stdarch-gen-wasm/Cargo.toml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[package]
2+
name = "stdarch-gen-wasm"
3+
version = "0.1.0"
4+
authors = ["Madhav Madhusoodanan <[email protected]>"]
5+
edition = "2024"
6+
7+
[dependencies]
8+
tree-sitter = "0.24"
9+
tree-sitter-rust = "0.23"
10+
tree-sitter-c = "0.23"
11+
clap = { version = "4.5.45", features = ["derive"] }
12+
13+
[build-dependencies]
14+
cc="*"

crates/stdarch-gen-wasm/src/cli.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
use clap::Parser;
2+
3+
#[derive(Parser, Debug)]
4+
#[command(version, about, long_about = None)]
5+
pub struct Args {
6+
/// The Rust source file path argument
7+
#[arg(short, long)]
8+
pub c: String,
9+
10+
/// The Rust source file path argument
11+
#[arg(short, long)]
12+
pub rust: String,
13+
}

crates/stdarch-gen-wasm/src/main.rs

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
mod cli;
2+
mod matcher;
3+
mod structs;
4+
mod utils;
5+
6+
use std::fs;
7+
8+
use clap::Parser as ClapParser;
9+
use cli::Args;
10+
use tree_sitter::{Parser, Tree};
11+
12+
use crate::matcher::match_intrinsic_definitions;
13+
use crate::structs::{CIntrinsic, RustIntrinsic};
14+
use crate::utils::leaf_nodes_from_grammar_name;
15+
16+
/// Read the Rust source code and returns its AST
17+
fn process_rust_code(source: &String) -> (String, Tree) {
18+
let mut parser = Parser::new();
19+
parser
20+
.set_language(&tree_sitter_rust::LANGUAGE.into())
21+
.expect("Error loading Rust grammar");
22+
let source_code = fs::read_to_string(source);
23+
24+
if let Err(ref err) = source_code {
25+
eprintln!("Rust parsing error: {}", err);
26+
panic!()
27+
}
28+
let source_code_string = source_code.unwrap();
29+
let tree = parser.parse(source_code_string.clone(), None).unwrap();
30+
(source_code_string, tree)
31+
}
32+
33+
/// Reads the C source code and returns its AST
34+
fn process_c_code(source: &String) -> (String, Tree) {
35+
let mut parser = Parser::new();
36+
parser
37+
.set_language(&tree_sitter_c::LANGUAGE.into())
38+
.expect("Error loading Rust grammar");
39+
let source_code = fs::read_to_string(source);
40+
41+
if let Err(ref err) = source_code {
42+
eprintln!("C parsing error: {}", err);
43+
panic!()
44+
}
45+
let source_code_string = source_code.unwrap();
46+
let tree = parser.parse(source_code_string.clone(), None).unwrap();
47+
(source_code_string, tree)
48+
}
49+
50+
/// Creates an entry in the spec sheet that corresponds to a specific intrinsic
51+
fn generate_spec(c_intrinsic: &CIntrinsic, rust_intrinsic: &RustIntrinsic) -> String {
52+
format!(
53+
"/// {}
54+
c-intrinsic-name = {}
55+
c-arguments = {}
56+
c-arguments-data-types = {}
57+
c-return-type = {}
58+
rust-intrinsic-name = {}
59+
rust-arguments = {}
60+
rust-arguments-data-types = {}
61+
rust-const-generic-arguments = {}
62+
rust-const-generic-arguments-data-types = {}
63+
rust-return-type = {}",
64+
rust_intrinsic.intrinsic,
65+
c_intrinsic.intrinsic,
66+
c_intrinsic.arg_names.join(", "),
67+
c_intrinsic.arg_types.join(", "),
68+
c_intrinsic.return_type.unwrap_or(""),
69+
rust_intrinsic.intrinsic,
70+
rust_intrinsic.arg_names.join(", "),
71+
rust_intrinsic.arg_types.join(", "),
72+
rust_intrinsic.generic_arg_names.join(", "),
73+
rust_intrinsic.generic_arg_types.join(", "),
74+
rust_intrinsic.return_type.unwrap_or(""),
75+
)
76+
}
77+
78+
/// Create the spec sheet.
79+
///
80+
/// Fields that would be present in the spec sheet:
81+
/// 1. c-intrinsic-name
82+
/// 2. c-arguments
83+
/// 3. c-arguments-data-types
84+
/// 4. c-return-type
85+
/// 5. rust-intrinsic-name
86+
/// 6. rust-arguments
87+
/// 7. rust-arguments-data-types
88+
/// 8. rust-const-generic-arguments
89+
/// 9. rust-const-generic-arguments-data-types
90+
/// 10. rust-return-type
91+
fn main() {
92+
// Read the file-paths from CLI arguments
93+
// obtain the tree of tokens from the code
94+
let args = Args::parse();
95+
let (c_source, c_tree) = process_c_code(&args.c);
96+
let (rust_source, rust_tree) = process_rust_code(&args.rust);
97+
98+
let preproc_node = c_tree.root_node();
99+
100+
let c_intrinsics = leaf_nodes_from_grammar_name(preproc_node, "function_definition")
101+
.iter()
102+
.map(|&node| CIntrinsic::new(node, &c_source))
103+
.collect::<Vec<_>>();
104+
105+
let mut rust_cursor = rust_tree.root_node().walk();
106+
let rust_intrinsics = rust_tree
107+
.root_node()
108+
.children(&mut rust_cursor)
109+
.filter(|node| node.grammar_name() == "function_item")
110+
.map(|node| RustIntrinsic::new(node, &rust_source))
111+
.collect::<Vec<_>>();
112+
113+
let matching_intrinsics = match_intrinsic_definitions(&c_intrinsics, &rust_intrinsics);
114+
println!(
115+
"// This code is automatically generated. DO NOT MODIFY.
116+
// Number of matched intrinsics: {}\n",
117+
matching_intrinsics.len()
118+
);
119+
120+
let spec_details = matching_intrinsics
121+
.iter()
122+
.map(|&(c_intrinsic, rust_intrinsic)| generate_spec(c_intrinsic, rust_intrinsic))
123+
.collect::<Vec<_>>()
124+
.join("\n\n");
125+
println!("{}", spec_details);
126+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
use std::collections::HashSet;
2+
3+
use crate::structs::{CIntrinsic, RustIntrinsic};
4+
5+
/// Matches the set of intrinsics in Rust to their C counterpart.
6+
///
7+
/// This function assumes that the list of Rust definitions
8+
/// will be a subset of the list of definitions in C.
9+
pub fn match_intrinsic_definitions<'a>(
10+
c_definitions: &'a Vec<CIntrinsic>,
11+
rust_definitions: &'a Vec<RustIntrinsic>,
12+
) -> Vec<(&'a CIntrinsic<'a>, &'a RustIntrinsic<'a>)> {
13+
// This function assumes that the list of Rust definitions
14+
// will be a subset of the list of definitions in C
15+
16+
let mut matched_definitions: Vec<(&'a CIntrinsic, &'a RustIntrinsic)> = Vec::new();
17+
matched_definitions.reserve(rust_definitions.len());
18+
19+
for rust_definition in rust_definitions.iter() {
20+
let c_definition = c_definitions
21+
.iter()
22+
.find(|&c_def| match_intrinsic_definition(c_def.intrinsic, rust_definition.intrinsic));
23+
if let Some(c_def) = c_definition {
24+
matched_definitions.push((c_def, rust_definition));
25+
}
26+
}
27+
28+
matched_definitions
29+
}
30+
31+
/// checks if the function name of the intrinsic in Rust
32+
/// matches that of the intrinsic in C.
33+
fn match_intrinsic_definition(c_definition: &str, rust_definition: &str) -> bool {
34+
// Most intrinsics in C are of the format: `wasm_v128_load`.
35+
// Its Rust counterpart is named `v128_load`.
36+
//
37+
// Another one is `wasm_i8x16_const_splat`, and its Rust counterpart is `i8x16_splat`.
38+
//
39+
// The pattern that is observed is that, each keyword "chunk" that constructs
40+
// the intrinsic name in Rust will also be used to construct the intrinsic name in C.
41+
// These names are constructed by joining the chunks with an underscore (_).
42+
43+
let c_definition_split: HashSet<_> = c_definition.split('_').collect();
44+
rust_definition
45+
.split('_')
46+
.all(|keyword| c_definition_split.contains(keyword))
47+
}

0 commit comments

Comments
 (0)