Skip to content

Commit 76f9f12

Browse files
feat: add support for proper parsing to dracula (#15)
* feat: add support for multiple new languages to be parsed * feat: use tree-sitter for parsing grammars to allow for slower but more accurate results * feat: add support for meaningful lines to v2 also add support for Typescript and TSX * add: support for new languages to pydracula * fix: improve range match handling * add tests * add js, jsx and ts tests * chore: improve pydracula bindings * fix: as per suggestion Signed-off-by: Swarnim Arun <[email protected]> * add: indicies matching tests --------- Signed-off-by: Swarnim Arun <[email protected]>
1 parent 1582216 commit 76f9f12

File tree

20 files changed

+1131
-244
lines changed

20 files changed

+1131
-244
lines changed

Cargo.lock

Lines changed: 179 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
[workspace]
2-
members = [
3-
"cdracula",
4-
"pydracula"
5-
]
2+
members = ["cdracula", "pydracula"]
63

74
[workspace.package]
85
edition = "2021"
@@ -30,10 +27,25 @@ ra_ap_syntax = "0.0.149"
3027
pretty_assertions = "1.3.0"
3128
letr = "0.2.1"
3229

30+
[dependencies]
31+
tree-sitter = "0.20.10"
32+
tree-sitter-rust = "0.20.3"
33+
tree-sitter-java = "0.20.0"
34+
tree-sitter-javascript = "0.20.0"
35+
tree-sitter-typescript = "0.20.2"
36+
tree-sitter-python = "0.20.2"
37+
tree-sitter-ruby = "0.20.0"
38+
tree-sitter-c-sharp = "0.20.0"
39+
tree-sitter-c = "0.20.2"
40+
tree-sitter-cpp = "0.20.0"
41+
tree-sitter-go = "0.19.1"
42+
tree-sitter-scala = { git = "https://github.com/tree-sitter/tree-sitter-scala", rev = "7d348f51e442563f4ab2b6c3e136dac658649f93" }
43+
tree-sitter-kotlin = { git = "https://github.com/swarnimarun/tree-sitter-kotlin", branch = "add-field-name" }
44+
3345
[dependencies.log]
3446
version = "0.4"
3547
optional = true
3648

3749
[features]
3850
log = ["dep:log"]
39-
cli = ["log"]
51+
cli = ["log"]

pydracula/src/lib.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,23 @@ enum Lang {
99
Java,
1010
}
1111

12+
#[pyclass]
13+
#[derive(Debug, Clone, Copy)]
14+
enum Language {
15+
Python,
16+
Rust,
17+
C,
18+
Cpp,
19+
Java,
20+
Typescript,
21+
Tsx,
22+
Javascript,
23+
Jsx,
24+
Scala,
25+
CSharp,
26+
Ruby,
27+
}
28+
1229
impl Lang {
1330
fn get_meaningful_line_indices(&self, src: &str) -> Vec<usize> {
1431
use dracula::count::*;
@@ -62,6 +79,35 @@ fn get_meaningful_line_indices(lang: Lang, src: &str) -> Vec<usize> {
6279
lang.get_meaningful_line_indices(src)
6380
}
6481

82+
#[pyfunction]
83+
/// This function gets the list of lines that can be assumed to be meaningful/executable
84+
/// from a test-coverage or similar standpoint.
85+
///
86+
/// Further returns the list of line indexes starting from index 1.
87+
///
88+
/// If the parsing fails, then this returns None
89+
fn get_lines_with_executable_code(lang: Language, src: &str) -> Option<Vec<usize>> {
90+
use dracula::parse::v2::*;
91+
let treesitter_lang = match lang {
92+
Language::Python => TreeSitterLanguage::Python,
93+
Language::Rust => TreeSitterLanguage::Rust,
94+
Language::C => TreeSitterLanguage::C,
95+
Language::Cpp => TreeSitterLanguage::Cpp,
96+
Language::Java => TreeSitterLanguage::Java,
97+
Language::Typescript => TreeSitterLanguage::Typescript,
98+
Language::Tsx => TreeSitterLanguage::TSX,
99+
Language::Javascript | Language::Jsx => TreeSitterLanguage::Javascript,
100+
// currently untested consider waiting for proper tests to be merged before using
101+
Language::Scala => TreeSitterLanguage::Scala,
102+
Language::CSharp => TreeSitterLanguage::CSharp,
103+
Language::Ruby => TreeSitterLanguage::Ruby,
104+
Language::Go => TreeSitterLanguage::Go,
105+
};
106+
Parser::new(treesitter_lang)
107+
.and_then(|parser| parser.non_executable_src_spans(src))
108+
.map(|spans| get_lines_without_ranges(src, &spans))
109+
}
110+
65111
#[pyfunction]
66112
fn get_cleaned_source_code(lang: Lang, src: &str) -> String {
67113
lang.get_cleaned_source_code(src)
@@ -78,6 +124,7 @@ fn pydracula(_py: Python<'_>, m: &types::PyModule) -> PyResult<()> {
78124
m.add_function(wrap_pyfunction!(get_meaningful_line_indices, m)?)?;
79125
m.add_function(wrap_pyfunction!(get_cleaned_source_code, m)?)?;
80126
m.add_function(wrap_pyfunction!(get_count_of_meaningful_lines, m)?)?;
127+
m.add_function(wrap_pyfunction!(get_lines_with_executable_code, m)?)?;
81128
m.add_class::<Lang>()?;
82129
Ok(())
83130
}

rust-toolchain.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[toolchain]
2+
channel = "nightly"

0 commit comments

Comments
 (0)