Skip to content

Commit 2af1b85

Browse files
swarnim-deepsourcedeepsource-autofix[bot]raghav-deepsource
authored
feat: add c_api for dracula & minor refactoring (#4)
* feat: add `capi` for line-munch & minor refactoring * Autofix issues in 1 file Resolved issues in cdracula/src/lib.rs via DeepSource Autofix * fix: for DeepSource * fix: formatting and incomplete bits of code * fix: cleanup code * fix: bug & refactor apis * Apply suggestions from code review Signed-off-by: raghav-deepsource <[email protected]> * fix: cleanup code * chore: fix tests & move asserts to only debug builds * remove: `log` crate * chore: cleanup code with macros & update interfaces --------- Signed-off-by: raghav-deepsource <[email protected]> Co-authored-by: deepsource-autofix[bot] <62050782+deepsource-autofix[bot]@users.noreply.github.com> Co-authored-by: raghav-deepsource <[email protected]>
1 parent 925ae24 commit 2af1b85

File tree

12 files changed

+604
-86
lines changed

12 files changed

+604
-86
lines changed

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
1+
[workspace]
2+
members = [
3+
"cdracula"
4+
]
5+
16
[package]
27
name = "dracula"
38
description = "🧛 Count-ing lines, AH AH AHH!"
49
version = "0.1.0"
5-
author = "Swarnim Arun <[email protected]>"
10+
authors = ["Swarnim Arun <[email protected]>"]
611
edition = "2021"
712
license-file = "LICENSE"
813
documentation = "https://docs.rs/dracula"

cdracula/Cargo.toml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[package]
2+
name = "cdracula"
3+
description = "🧛 Count-ing lines, AH AH AHH!"
4+
version = "0.1.0"
5+
authors = ["Swarnim Arun <[email protected]>"]
6+
edition = "2021"
7+
8+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
9+
[lib]
10+
crate-type = ["cdylib"]
11+
12+
[dependencies]
13+
dracula = { path = ".." }

cdracula/src/lib.rs

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
//! Functions provided here are a wrapper around `dracula` crate
2+
#![deny(improper_ctypes_definitions)]
3+
#![allow(non_upper_case_globals)]
4+
5+
mod util_macros;
6+
7+
use dracula::parse::ParseOutput;
8+
use std::ffi::{self, c_char};
9+
10+
languages_supported! {
11+
const Python = 1;
12+
const C = 2;
13+
const Rust = 3;
14+
const Java = 4;
15+
}
16+
17+
#[no_mangle]
18+
pub static PYTHON_LANG: ffi::c_uint = Python;
19+
#[no_mangle]
20+
pub static C_LANG: ffi::c_uint = C;
21+
#[no_mangle]
22+
pub static RUST_LANG: ffi::c_uint = Rust;
23+
#[no_mangle]
24+
pub static JAVA_LANG: ffi::c_uint = Java;
25+
26+
#[no_mangle]
27+
/// This function is used to get the count of meaningful lines in the source.
28+
///
29+
/// It currently doesn't support setting the multiple ways(`kind`) of meaningful line
30+
/// search eg. `ignore whitespace`, `specific character`, etc.
31+
/// aka the definition of a meaningful line.
32+
/// But provided as field to avoid ABI incompatibility later.
33+
pub unsafe fn get_meaningful_line_count(
34+
src: *const c_char,
35+
lang: ffi::c_uint,
36+
_kind: ffi::c_uint,
37+
) -> ffi::c_ulonglong {
38+
let Some(parser) = get_parser(lang) else {
39+
return ffi::c_ulonglong::MAX
40+
};
41+
let Some(is_meaningful) = is_meaningful(lang) else {
42+
return ffi::c_ulonglong::MAX
43+
};
44+
let cstr = ffi::CStr::from_ptr(src);
45+
cstr.to_str()
46+
.map(|src| {
47+
let parsed = parser(src);
48+
let mut line_count: usize = 0;
49+
let mut stack = vec![];
50+
for p in parsed {
51+
if matches!(p, ParseOutput::EOL(_) | ParseOutput::EOF) {
52+
if stack.iter().any(is_meaningful) {
53+
line_count += 1;
54+
}
55+
// We clear the stack once we reach the end of a line.
56+
stack.clear();
57+
} else {
58+
// we accumulate tokens we see as meaningful tokens for the language.
59+
stack.push(p);
60+
}
61+
}
62+
line_count
63+
})
64+
.unwrap_or_default() as _
65+
}
66+
67+
#[no_mangle]
68+
/// This function is used to get the list of meaningful lines in the source.
69+
///
70+
/// It currently doesn't support setting the multiple ways(`kind`) of meaningful line
71+
/// search eg. `ignore whitespace`, `specific character`, etc.
72+
/// aka the definition of a meaningful line.
73+
/// But provided as field to avoid ABI incompatibility later.
74+
///
75+
/// NOTE:
76+
/// The caller is responsible for free'ing the obtained array
77+
pub unsafe fn meaningful_lines(
78+
src: *const c_char,
79+
lang: ffi::c_uint,
80+
_kind: ffi::c_uint,
81+
r_lines_len: *mut ffi::c_ulonglong,
82+
) -> *mut ffi::c_ulonglong {
83+
let Some(parser) = get_parser(lang) else {
84+
return std::ptr::null_mut()
85+
};
86+
let Some(is_meaningful) = is_meaningful(lang) else {
87+
return std::ptr::null_mut()
88+
};
89+
let mut meaningful_lines = Vec::<ffi::c_ulonglong>::new();
90+
if cfg!(dbg) {
91+
assert!(!src.is_null());
92+
}
93+
let cstr = ffi::CStr::from_ptr(src);
94+
_ = cstr.to_str().map(|src| {
95+
let mut parsed = parser(src);
96+
let lines = src.split_inclusive('\n').enumerate();
97+
struct Span {
98+
start: usize,
99+
end: usize,
100+
}
101+
let mut line_span = Span { start: 0, end: 0 };
102+
let mut parse_span = Span { start: 0, end: 0 };
103+
let mut last_parsed_output = None;
104+
for (idx, line) in lines {
105+
// setup line start and end
106+
line_span.start = line_span.end;
107+
line_span.end += line.len();
108+
// traverse parsed output until the span end is reached
109+
let mut po_stack = if let Some(po) = last_parsed_output {
110+
vec![po]
111+
} else {
112+
vec![]
113+
};
114+
while parse_span.end < line_span.end {
115+
if let Some(parsed_output) = parsed.next() {
116+
// setup parsed start and end
117+
parse_span.start = parse_span.end;
118+
parse_span.end += parsed_output.len();
119+
po_stack.push(parsed_output);
120+
}
121+
}
122+
if po_stack.iter().any(is_meaningful) {
123+
meaningful_lines.push(idx as u64);
124+
}
125+
if cfg!(dbg) {
126+
eprintln!("{} == {}", idx, po_stack.iter().any(is_meaningful));
127+
eprintln!("{:?}", po_stack);
128+
eprintln!("-------------------------------------------------");
129+
}
130+
if parse_span.end != line_span.end {
131+
last_parsed_output = po_stack.pop();
132+
} else {
133+
last_parsed_output = None;
134+
}
135+
}
136+
});
137+
meaningful_lines.shrink_to_fit();
138+
if cfg!(dbg) {
139+
assert!(meaningful_lines.len() == meaningful_lines.capacity());
140+
}
141+
let ptr = meaningful_lines.as_mut_ptr();
142+
let len = meaningful_lines.len();
143+
*r_lines_len = len as _;
144+
std::mem::forget(meaningful_lines); // prevent deallocation in Rust
145+
ptr
146+
}
147+
148+
#[no_mangle]
149+
/// This function is used to get the source of just the meaningful parts in the source,
150+
/// including the whitespaces.
151+
///
152+
/// It currently doesn't support setting the multiple ways(`kind`) of meaningful line
153+
/// search eg. `ignore whitespace`, `specific character`, etc.
154+
/// aka the definition of a meaningful line.
155+
/// But provided as field to avoid ABI incompatibility later.
156+
///
157+
/// NOTE:
158+
/// The caller is responsible for free'ing the obtained array
159+
pub unsafe fn get_cleaned_src(
160+
src: *const c_char,
161+
lang: ffi::c_uint,
162+
_kind: ffi::c_uint,
163+
_exclude: ffi::c_uint,
164+
) -> *mut i8 {
165+
let Some(parser) = get_parser(lang) else {
166+
return std::ptr::null_mut()
167+
};
168+
let Some(is_meaningful_src) = is_meaningful_src(lang) else {
169+
return std::ptr::null_mut()
170+
};
171+
let cstr = ffi::CStr::from_ptr(src);
172+
let src = cstr
173+
.to_str()
174+
.map(|src| {
175+
let parsed = parser(src);
176+
let mut meaningful_src = String::default();
177+
let mut stack = vec![];
178+
for p in parsed {
179+
if matches!(p, ParseOutput::EOL(_) | ParseOutput::EOF) {
180+
let meaningful_src_len = meaningful_src.len();
181+
for po in stack.iter() {
182+
if let ParseOutput::Source(s) = po {
183+
if is_meaningful_src(s) {
184+
meaningful_src.push_str(s);
185+
}
186+
}
187+
}
188+
if matches!(p, ParseOutput::EOL(_))
189+
&& meaningful_src_len != meaningful_src.len()
190+
{
191+
meaningful_src.push('\n');
192+
}
193+
stack.clear();
194+
} else {
195+
stack.push(p);
196+
}
197+
}
198+
meaningful_src
199+
})
200+
.unwrap_or_default();
201+
ffi::CString::from_vec_unchecked(src.into()).into_raw()
202+
}

cdracula/src/util_macros.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#[macro_export]
2+
macro_rules! languages_supported {
3+
($(const $name:ident = $num:literal;)+) => {
4+
$(
5+
const $name: std::ffi::c_uint = $num;
6+
)+
7+
fn get_parser(lang: std::ffi::c_uint) -> Option<fn(&str) -> dracula::parse::Parser> {
8+
use dracula::parse::Language;
9+
$(
10+
if lang == $name {
11+
return Some(dracula::langs::$name::get_parser());
12+
}
13+
)+
14+
None
15+
}
16+
fn is_meaningful(lang: std::ffi::c_uint) -> Option<fn(&dracula::parse::ParseOutput) -> bool> {
17+
use dracula::parse::Language;
18+
$(
19+
if lang == $name {
20+
return Some(dracula::langs::$name::is_meaningful());
21+
}
22+
)+
23+
None
24+
}
25+
fn is_meaningful_src(lang: std::ffi::c_uint) -> Option<fn(&str) -> bool> {
26+
use dracula::parse::Language;
27+
$(
28+
if lang == $name {
29+
return Some(dracula::langs::$name::is_meaningful_src);
30+
}
31+
)+
32+
None
33+
}
34+
};
35+
}

0 commit comments

Comments
 (0)