diff --git a/Cargo.lock b/Cargo.lock index 313f75d..319947b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,21 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + [[package]] name = "aho-corasick" version = "1.1.3" @@ -67,12 +82,36 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "anyhow" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" +dependencies = [ + "backtrace", +] + [[package]] name = "autocfg" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -168,8 +207,11 @@ dependencies = [ "bytes", "lazy_static", "ouroboros", + "thiserror", "tree-sitter", + "tree-sitter-java", "tree-sitter-javascript", + "tree-sitter-json", "tree-sitter-python", "tree-sitter-typescript", ] @@ -199,7 +241,9 @@ dependencies = [ "codegen-sdk-cst-generator", "codegen-sdk-macros", "convert_case", + "env_logger", "ouroboros", + "rayon", "tempfile", "tree-sitter", ] @@ -208,6 +252,7 @@ dependencies = [ name = "codegen-sdk-cst-generator" version = "0.1.0" dependencies = [ + "anyhow", "codegen-sdk-common", "convert_case", "log", @@ -366,6 +411,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + [[package]] name = "glob" version = "0.3.2" @@ -492,6 +543,15 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "miniz_oxide" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" +dependencies = [ + "adler2", +] + [[package]] name = "ntapi" version = "0.4.1" @@ -501,6 +561,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + [[package]] name = "once_cell" version = "1.20.3" @@ -718,6 +787,12 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + [[package]] name = "rustc-hash" version = "1.1.0" @@ -897,6 +972,26 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "thiserror" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "tree-sitter" version = "0.25.1" @@ -911,6 +1006,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-java" +version = "0.23.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-javascript" version = "0.23.1" @@ -921,6 +1026,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-json" +version = "0.24.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d727acca406c0020cffc6cf35516764f36c8e3dc4408e5ebe2cb35a947ec471" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-language" version = "0.1.4" diff --git a/Cargo.toml b/Cargo.toml index e3f8299..a2c1579 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,10 +9,10 @@ codegen-sdk-analyzer = { path = "codegen-sdk-analyzer" } codegen-sdk-cst = { path = "codegen-sdk-cst" , features = ["typescript", "javascript", "tsx", "jsx"]} codegen-sdk-common = { path = "codegen-sdk-common" } crossbeam = "0.8.4" -env_logger = "0.11.6" glob = "0.3.2" +env_logger = { workspace = true } log = { workspace = true } -rayon = "1.10.0" +rayon = { workspace = true} sysinfo = "0.33.1" [workspace] members = [ @@ -23,12 +23,16 @@ members = [ "codegen-sdk-cst-generator", "codegen-sdk-macros", ] [workspace.dependencies] +rayon = "1.10.0" +env_logger = "0.11.6" log = "0.4.25" ouroboros = "0.18.5" tree-sitter = "0.25.1" tree-sitter-python = "0.23.6" tree-sitter-typescript = "0.23.2" tree-sitter-javascript = "0.23.1" +tree-sitter-json = "0.24.0" +tree-sitter-java = "0.23.5" bytes = "1.10.0" convert_case = "0.7.1" serde = { version = "1.0.217", features = ["derive"] } diff --git a/codegen-sdk-common/Cargo.toml b/codegen-sdk-common/Cargo.toml index 6e64a6f..23dbadd 100644 --- a/codegen-sdk-common/Cargo.toml +++ b/codegen-sdk-common/Cargo.toml @@ -10,7 +10,13 @@ ouroboros = { workspace = true } tree-sitter-python = { workspace = true, optional = true } tree-sitter-typescript = { workspace = true, optional = true } tree-sitter-javascript = { workspace = true, optional = true } +tree-sitter-json = { workspace = true, optional = true } +tree-sitter-java = { workspace = true, optional = true } lazy_static = "1.5.0" +thiserror = "2.0.11" [features] python = ["dep:tree-sitter-python"] +json = ["dep:tree-sitter-json"] +java = ["dep:tree-sitter-java"] typescript = ["dep:tree-sitter-typescript", "dep:tree-sitter-javascript"] +all = ["python", "typescript", "json", "java"] diff --git a/codegen-sdk-common/src/errors.rs b/codegen-sdk-common/src/errors.rs new file mode 100644 index 0000000..e94e6b5 --- /dev/null +++ b/codegen-sdk-common/src/errors.rs @@ -0,0 +1,22 @@ +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum ParseError { + #[error("TreeSitter error: {0}")] + TreeSitter(#[from] tree_sitter::LanguageError), + #[error("Unknown Language")] + UnknownLanguage, + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + #[error("UTF-8 error: {0}")] + Utf8(#[from] std::string::FromUtf8Error), + #[error("Missing Required Field '{field_name}' in node of type '{parent_node}'")] + MissingNode { + field_name: String, + parent_node: String, + }, + #[error("Miscelaneous error")] + Miscelaneous, + #[error("Unexpected Node Type {node_type}")] + UnexpectedNode { node_type: String }, +} diff --git a/codegen-sdk-common/src/language.rs b/codegen-sdk-common/src/language.rs index d652dcb..8858c0e 100644 --- a/codegen-sdk-common/src/language.rs +++ b/codegen-sdk-common/src/language.rs @@ -1,5 +1,5 @@ -use tree_sitter::{LanguageError, Parser}; - +use crate::errors::ParseError; +use tree_sitter::Parser; pub struct Language { pub name: &'static str, pub struct_name: &'static str, @@ -8,15 +8,18 @@ pub struct Language { pub tree_sitter_language: tree_sitter::Language, } impl Language { - pub fn parse_tree_sitter(&self, content: &str) -> Result { + pub fn parse_tree_sitter(&self, content: &str) -> Result { let mut parser = Parser::new(); parser.set_language(&self.tree_sitter_language)?; - let tree = parser.parse(content, None).unwrap(); - Ok(tree) + parser.parse(content, None).ok_or(ParseError::Miscelaneous) } } +#[cfg(feature = "java")] +pub mod java; #[cfg(feature = "typescript")] pub mod javascript; +#[cfg(feature = "json")] +pub mod json; #[cfg(feature = "typescript")] pub mod jsx; #[cfg(feature = "python")] @@ -37,5 +40,9 @@ lazy_static! { &jsx::JSX, #[cfg(feature = "typescript")] &javascript::Javascript, + #[cfg(feature = "json")] + &json::JSON, + #[cfg(feature = "java")] + &java::Java, ]; } diff --git a/codegen-sdk-common/src/language/java.rs b/codegen-sdk-common/src/language/java.rs new file mode 100644 index 0000000..f9edf50 --- /dev/null +++ b/codegen-sdk-common/src/language/java.rs @@ -0,0 +1,10 @@ +use super::Language; +lazy_static! { + pub static ref Java: Language = Language { + name: "java", + struct_name: "Java", + node_types: tree_sitter_java::NODE_TYPES, + file_extensions: &["java"], + tree_sitter_language: tree_sitter_java::LANGUAGE.into(), + }; +} diff --git a/codegen-sdk-common/src/language/json.rs b/codegen-sdk-common/src/language/json.rs new file mode 100644 index 0000000..a5d7b75 --- /dev/null +++ b/codegen-sdk-common/src/language/json.rs @@ -0,0 +1,10 @@ +use super::Language; +lazy_static! { + pub static ref JSON: Language = Language { + name: "json", + struct_name: "JSON", + node_types: tree_sitter_json::NODE_TYPES, + file_extensions: &["json"], + tree_sitter_language: tree_sitter_json::LANGUAGE.into(), + }; +} diff --git a/codegen-sdk-common/src/lib.rs b/codegen-sdk-common/src/lib.rs index 5558197..48a156a 100644 --- a/codegen-sdk-common/src/lib.rs +++ b/codegen-sdk-common/src/lib.rs @@ -1,5 +1,10 @@ +mod errors; pub mod language; pub mod traits; pub mod utils; +pub use errors::ParseError; +pub use language::Language; +pub use traits::*; +pub use utils::*; #[macro_use] extern crate lazy_static; diff --git a/codegen-sdk-common/src/traits.rs b/codegen-sdk-common/src/traits.rs index 2c5476c..fba5dec 100644 --- a/codegen-sdk-common/src/traits.rs +++ b/codegen-sdk-common/src/traits.rs @@ -1,7 +1,8 @@ +use crate::errors::ParseError; use bytes::Bytes; use tree_sitter::{self, Point}; -pub trait FromNode { - fn from_node(node: tree_sitter::Node) -> Self; +pub trait FromNode: Sized { + fn from_node(node: tree_sitter::Node) -> Result; } pub trait CSTNode: Send { fn start_byte(&self) -> usize; diff --git a/codegen-sdk-common/src/utils.rs b/codegen-sdk-common/src/utils.rs index a1c9e92..5273a77 100644 --- a/codegen-sdk-common/src/utils.rs +++ b/codegen-sdk-common/src/utils.rs @@ -1,14 +1,51 @@ use bytes::{Bytes, BytesMut}; use tree_sitter::{self}; -pub fn named_children_without_field_names(node: tree_sitter::Node) -> Vec { + +use crate::{traits::FromNode, ParseError}; +pub fn named_children_without_field_names( + node: tree_sitter::Node, +) -> Result, ParseError> { let mut children = Vec::new(); for (index, child) in node.named_children(&mut node.walk()).enumerate() { if node.field_name_for_named_child(index as u32).is_none() { - children.push(child); + children.push(T::from_node(child)?); } } - children + Ok(children) } + pub fn get_text_from_node(node: tree_sitter::Node) -> Bytes { BytesMut::zeroed(node.end_byte() - node.start_byte()).into() } +pub fn get_optional_child_by_field_name( + node: &tree_sitter::Node, + field_name: &str, +) -> Result, ParseError> { + if let Some(child) = node.child_by_field_name(field_name) { + return Ok(Some(T::from_node(child)?)); + } + Ok(None) +} +pub fn get_child_by_field_name( + node: &tree_sitter::Node, + field_name: &str, +) -> Result { + if let Some(child) = get_optional_child_by_field_name(node, field_name)? { + return Ok(child); + } + Err(ParseError::MissingNode { + field_name: field_name.to_string(), + parent_node: node.kind().to_string(), + }) +} + +pub fn get_multiple_children_by_field_name( + node: &tree_sitter::Node, + field_name: &str, +) -> Result, ParseError> { + let mut children = Vec::new(); + for child in node.children_by_field_name(field_name, &mut node.walk()) { + children.push(T::from_node(child)?); + } + Ok(children) +} diff --git a/codegen-sdk-cst-generator/Cargo.toml b/codegen-sdk-cst-generator/Cargo.toml index 5f42fdc..46758c9 100644 --- a/codegen-sdk-cst-generator/Cargo.toml +++ b/codegen-sdk-cst-generator/Cargo.toml @@ -13,5 +13,6 @@ syn = "2.0.98" tree-sitter = { workspace = true } log = { workspace = true } codegen-sdk-common = { path = "../codegen-sdk-common" } +anyhow = { version = "1.0.95", features = ["backtrace"] } [dev-dependencies] codegen-sdk-common = { path = "../codegen-sdk-common" , features = ["python"] } diff --git a/codegen-sdk-cst-generator/src/generator.rs b/codegen-sdk-cst-generator/src/generator.rs index afbdce9..3849440 100644 --- a/codegen-sdk-cst-generator/src/generator.rs +++ b/codegen-sdk-cst-generator/src/generator.rs @@ -2,7 +2,7 @@ use crate::parser::Node; use enum_generator::generate_enum; use naming::normalize_type_name; use state::State; -use std::{collections::HashSet, error::Error}; +use std::collections::HashSet; use struct_generator::generate_struct; mod enum_generator; mod format; @@ -10,14 +10,13 @@ mod naming; mod state; mod struct_generator; const IMPORTS: &str = " -use codegen_sdk_common::traits::*; use tree_sitter::{self, Point}; extern crate ouroboros; -use codegen_sdk_common::utils::*; +use codegen_sdk_common::*; use bytes::Bytes; "; -pub(crate) fn generate_cst(node_types: &Vec) -> Result> { +pub(crate) fn generate_cst(node_types: &Vec) -> anyhow::Result { let mut state = State::default(); let mut nodes = HashSet::new(); for node in node_types { @@ -50,8 +49,15 @@ pub(crate) fn generate_cst(node_types: &Vec) -> Result return Ok(formatted), + Err(e) => { + log::error!("Failed to format CST: {}", e); + return Ok(result.to_string()); + } + } } + #[cfg(test)] mod tests { use crate::parser::parse_node_types; diff --git a/codegen-sdk-cst-generator/src/generator/enum_generator.rs b/codegen-sdk-cst-generator/src/generator/enum_generator.rs index bbc5706..387c598 100644 --- a/codegen-sdk-cst-generator/src/generator/enum_generator.rs +++ b/codegen-sdk-cst-generator/src/generator/enum_generator.rs @@ -20,7 +20,7 @@ fn get_cases( } else if !existing_cases.contains(&t.type_name) { existing_cases.push(t.type_name.clone()); cases.push_str(&format!( - "\"{}\" => {}({variant_name}::from_node(node)),", + "\"{}\" => Ok({}({variant_name}::from_node(node)?)),", t.type_name, prefix, )); } @@ -63,16 +63,21 @@ pub fn generate_enum( continue; } let normalized_name = normalize_string(name); - cases.push_str(&format!("\"{}\" => Self::Anonymous,\n", normalized_name,)); + cases.push_str(&format!( + "\"{}\" => Ok(Self::Anonymous),\n", + normalized_name + )); } } state.enums.push_str(&format!( " impl FromNode for {enum_name} {{ - fn from_node(node: tree_sitter::Node) -> Self {{ + fn from_node(node: tree_sitter::Node) -> Result {{ match node.kind() {{ {cases} - _ => panic!(\"Unexpected node type: {{}}\", node.kind()), + _ => Err(ParseError::UnexpectedNode {{ + node_type: node.kind().to_string(), + }}), }} }} }} diff --git a/codegen-sdk-cst-generator/src/generator/format.rs b/codegen-sdk-cst-generator/src/generator/format.rs index 04b7b91..20bc99f 100644 --- a/codegen-sdk-cst-generator/src/generator/format.rs +++ b/codegen-sdk-cst-generator/src/generator/format.rs @@ -1,10 +1,4 @@ -pub fn format_cst(cst: &str) -> String { - let parsed = syn::parse_str::(cst) - .map_err(|e| { - log::error!("{:#?}", e); - e - }) - .unwrap(); - - prettyplease::unparse(&parsed) +pub fn format_cst(cst: &str) -> anyhow::Result { + let parsed = syn::parse_str::(cst)?; + Ok(prettyplease::unparse(&parsed)) } diff --git a/codegen-sdk-cst-generator/src/generator/struct_generator.rs b/codegen-sdk-cst-generator/src/generator/struct_generator.rs index 8cd085c..563e9f0 100644 --- a/codegen-sdk-cst-generator/src/generator/struct_generator.rs +++ b/codegen-sdk-cst-generator/src/generator/struct_generator.rs @@ -43,15 +43,15 @@ impl HasChildren for {{name}} { } } impl FromNode for {{name}} { - fn from_node(node: tree_sitter::Node) -> Self { - Self { + fn from_node(node: tree_sitter::Node) -> Result { + Ok(Self { start_byte: node.start_byte(), end_byte: node.end_byte(), start_position: node.start_position(), end_position: node.end_position(), text: Box::new(get_text_from_node(node)), {{fields}} - } + }) } } "; @@ -88,7 +88,11 @@ fn generate_multiple_field( " pub {field_name}: Vec<{}>,\n", converted_type_name )); - constructor_fields.push(format!(" {field_name}: node.children_by_field_name(\"{name}\", &mut node.walk()).map(|node| {converted_type_name}::from_node(node)).collect()", field_name = field_name, converted_type_name = converted_type_name, name=original_name)); + constructor_fields.push(format!( + " {field_name}: get_multiple_children_by_field_name(&node, \"{name}\")?", + field_name = field_name, + name = original_name + )); } fn generate_required_field( field_name: &str, @@ -102,7 +106,11 @@ fn generate_required_field( field_name = field_name, type_name = converted_type_name )); - constructor_fields.push(format!(" {field_name}: {converted_type_name}::from_node(node.child_by_field_name(\"{name}\").unwrap()).into()", field_name = field_name, converted_type_name = converted_type_name, name=original_name)); + constructor_fields.push(format!( + " {field_name}: Box::new(get_child_by_field_name(&node, \"{name}\")?)", + field_name = field_name, + name = original_name + )); } fn generate_optional_field( field_name: &str, @@ -116,7 +124,11 @@ fn generate_optional_field( field_name = field_name, type_name = converted_type_name )); - constructor_fields.push(format!(" {field_name}: node.child_by_field_name(\"{name}\").map(|node| {converted_type_name}::from_node(node)).into()", field_name = field_name, converted_type_name = converted_type_name, name=original_name)); + constructor_fields.push(format!( + " {field_name}: Box::new(get_optional_child_by_field_name(&node, \"{name}\")?)", + field_name = field_name, + name = original_name + )); } fn generate_fields( fields: &Fields, @@ -163,7 +175,8 @@ fn generate_children( ) -> String { let converted_type_name = convert_type_definition(&children.types, state, node_name, "children"); - constructor_fields.push(format!(" children: named_children_without_field_names(node).into_iter().map(|node| {converted_type_name}::from_node(node)).collect()", converted_type_name = converted_type_name)); + constructor_fields.push(" children: named_children_without_field_names(node)?".to_string()); + converted_type_name } pub fn generate_struct(node: &Node, state: &mut State, name: &str) { @@ -185,10 +198,9 @@ pub fn generate_struct(node: &Node, state: &mut State, name: &str) { .structs .push_str(&format!(" pub children: Vec<{}>,\n", children_type_name)); state.structs.push_str(FOOTER_TEMPLATE); - state.structs.push_str( - &CONSTRUCTOR_TEMPLATE - .replace("{{fields}}", &constructor_fields.join(",\n ")) - .replace("{{name}}", name) - .replace("{{children}}", &children_type_name), - ); + let constructor = &CONSTRUCTOR_TEMPLATE + .replace("{{fields}}", &constructor_fields.join(",\n ")) + .replace("{{name}}", name) + .replace("{{children}}", &children_type_name); + state.structs.push_str(&constructor); } diff --git a/codegen-sdk-cst-generator/src/lib.rs b/codegen-sdk-cst-generator/src/lib.rs index 19bbc6e..9ec9433 100644 --- a/codegen-sdk-cst-generator/src/lib.rs +++ b/codegen-sdk-cst-generator/src/lib.rs @@ -1,12 +1,11 @@ mod generator; pub(crate) mod parser; use codegen_sdk_common::language::Language; -use std::error::Error; -pub fn generate_cst(language: &Language) -> Result<(), Box> { +pub fn generate_cst(language: &Language) -> anyhow::Result<()> { let node_types = parser::parse_node_types(language)?; let cst = generator::generate_cst(&node_types)?; - let out_dir = std::env::var("OUT_DIR").unwrap(); + let out_dir = std::env::var("OUT_DIR")?; let out_file = format!("{}/{}.rs", out_dir, language.name); std::fs::write(out_file, cst)?; Ok(()) diff --git a/codegen-sdk-cst-generator/src/parser.rs b/codegen-sdk-cst-generator/src/parser.rs index c6aabf2..2d5593e 100644 --- a/codegen-sdk-cst-generator/src/parser.rs +++ b/codegen-sdk-cst-generator/src/parser.rs @@ -1,5 +1,3 @@ -use std::error::Error; - use codegen_sdk_common::language::Language; use serde::{Deserialize, Serialize}; @@ -45,7 +43,7 @@ pub struct Children { pub types: Vec, } -pub fn parse_node_types(language: &Language) -> Result, Box> { +pub fn parse_node_types(language: &Language) -> anyhow::Result> { let parsed: Vec = serde_json::from_str(language.node_types)?; Ok(parsed) } diff --git a/codegen-sdk-cst/Cargo.toml b/codegen-sdk-cst/Cargo.toml index b92a602..8414278 100644 --- a/codegen-sdk-cst/Cargo.toml +++ b/codegen-sdk-cst/Cargo.toml @@ -13,7 +13,8 @@ convert_case = { workspace = true } [build-dependencies] codegen-sdk-cst-generator = { path = "../codegen-sdk-cst-generator"} codegen-sdk-common = { path = "../codegen-sdk-common", features = ["python", "typescript"] } - +rayon = { workspace = true } +env_logger = { workspace = true } [dev-dependencies] tempfile = "3.16.0" [features] @@ -22,3 +23,6 @@ typescript = [ "codegen-sdk-common/typescript"] tsx = [ "codegen-sdk-common/typescript"] jsx = [ "codegen-sdk-common/typescript"] javascript = [ "codegen-sdk-common/typescript"] +json = [ "codegen-sdk-common/json"] +java = [ "codegen-sdk-common/java"] +default = ["typescript", "java", "tsx", "jsx", "javascript", "java"] diff --git a/codegen-sdk-cst/build.rs b/codegen-sdk-cst/build.rs index 46a6802..d505c9a 100644 --- a/codegen-sdk-cst/build.rs +++ b/codegen-sdk-cst/build.rs @@ -1,8 +1,9 @@ use codegen_sdk_common::language::LANGUAGES; use codegen_sdk_cst_generator::generate_cst; - +use rayon::prelude::*; fn main() { - for language in LANGUAGES.iter() { + env_logger::init(); + LANGUAGES.par_iter().for_each(|language| { generate_cst(language).unwrap(); - } + }); } diff --git a/codegen-sdk-cst/src/lib.rs b/codegen-sdk-cst/src/lib.rs index e45b31b..7c83edd 100644 --- a/codegen-sdk-cst/src/lib.rs +++ b/codegen-sdk-cst/src/lib.rs @@ -1,31 +1,32 @@ -use std::{ - error::Error, - fmt::{self, Display}, - path::PathBuf, -}; +use std::path::PathBuf; use codegen_sdk_common::{ language::Language, traits::{CSTNode, FromNode}, + ParseError, }; use codegen_sdk_macros::{include_language, parse_language}; pub trait CSTLanguage { type Program: CSTNode + FromNode + Send; fn language() -> &'static Language; - fn parse(content: &str) -> Result> { + fn parse(content: &str) -> Result { let tree = Self::language().parse_tree_sitter(content)?; - Ok(Self::Program::from_node(tree.root_node())) + Self::Program::from_node(tree.root_node()) } - fn parse_file(file_path: &PathBuf) -> Result> { + fn parse_file(file_path: &PathBuf) -> Result { let content = std::fs::read_to_string(file_path)?; Self::parse(&content) } - fn should_parse(file_path: &PathBuf) -> bool { - Self::language() - .file_extensions - .contains(&file_path.extension().unwrap().to_str().unwrap()) + fn should_parse(file_path: &PathBuf) -> Result { + Ok(Self::language().file_extensions.contains( + &file_path + .extension() + .ok_or(ParseError::Miscelaneous)? + .to_str() + .ok_or(ParseError::Miscelaneous)?, + )) } } include_language!(python); @@ -33,21 +34,17 @@ include_language!(typescript); include_language!(tsx); include_language!(jsx); include_language!(javascript); -#[derive(Debug)] -struct ParseError {} -impl Error for ParseError {} -impl Display for ParseError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "ParseError") - } -} -pub fn parse_file(file_path: &PathBuf) -> Result, Box> { +include_language!(json); +include_language!(java); +pub fn parse_file(file_path: &PathBuf) -> Result, ParseError> { parse_language!(python); parse_language!(typescript); parse_language!(tsx); parse_language!(jsx); parse_language!(javascript); - Err(Box::new(ParseError {})) + parse_language!(json); + parse_language!(java); + Err(ParseError::UnknownLanguage) } #[cfg(test)] diff --git a/codegen-sdk-macros/Cargo.toml b/codegen-sdk-macros/Cargo.toml index a8525b7..340258d 100644 --- a/codegen-sdk-macros/Cargo.toml +++ b/codegen-sdk-macros/Cargo.toml @@ -4,6 +4,6 @@ version = "0.1.0" edition = "2021" [dependencies] -codegen-sdk-common = { path = "../codegen-sdk-common", features = ["typescript", "python"] } +codegen-sdk-common = { path = "../codegen-sdk-common", features = ["all"] } [lib] proc-macro = true diff --git a/codegen-sdk-macros/src/lib.rs b/codegen-sdk-macros/src/lib.rs index bb923e9..91558b8 100644 --- a/codegen-sdk-macros/src/lib.rs +++ b/codegen-sdk-macros/src/lib.rs @@ -41,7 +41,7 @@ pub fn parse_language(_item: TokenStream) -> TokenStream { let language = get_language(&target_language); format!( "#[cfg(feature = \"{name}\")] - if {name}::{struct_name}::should_parse(file_path) {{ + if {name}::{struct_name}::should_parse(file_path)? {{ let parsed = {name}::{struct_name}::parse_file(file_path)?; return Ok(Box::new(parsed)); }} diff --git a/src/main.rs b/src/main.rs index b46c5d1..5105684 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,7 @@ use clap::Parser; use codegen_sdk_common::{language::LANGUAGES, traits::CSTNode}; use glob::glob; use rayon::prelude::*; -use std::{panic::catch_unwind, path, time::Instant}; +use std::{path, time::Instant}; use sysinfo::System; #[derive(Debug, Parser)] struct Args { @@ -14,10 +14,14 @@ fn get_memory() -> u64 { current.memory() } fn collect_files(dir: String) -> Vec { - glob(&format!("{}/**/*.ts*", dir)) - .unwrap() - .filter_map(|file| file.ok()) - .collect() + let mut files = Vec::new(); + for language in LANGUAGES.iter() { + for extension in language.file_extensions.iter() { + files.extend(glob(&format!("{dir}**/*.{}", extension)).unwrap()); + } + } + + files.into_iter().filter_map(|file| file.ok()).collect() } fn parse_file( file: &path::PathBuf, @@ -26,18 +30,15 @@ fn parse_file( if file.is_dir() { return None; } - let result = catch_unwind(|| codegen_sdk_cst::parse_file(file)); + let result = codegen_sdk_cst::parse_file(file); return match result { - Ok(Ok(program)) => Some(program), - Ok(Err(e)) => { + Ok(program) => Some(program), + Err(e) => { + log::error!("Error parsing file {}: {}", file.display(), e); tx.send(e.to_string()).unwrap(); None } - Err(_) => { - tx.send("".to_string()).unwrap(); - None - } }; } fn log_languages() {