Skip to content

Commit 8a0fe70

Browse files
Merge pull request #6 from deepsourcelabs/fix-unicode-handling
fix: bug with possible unicode bound fail
2 parents a3dd26b + c929e51 commit 8a0fe70

File tree

7 files changed

+47
-34
lines changed

7 files changed

+47
-34
lines changed

Cargo.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ members = [
77
[package]
88
name = "dracula"
99
description = "🧛 Count-ing lines, AH AH AHH!"
10-
version = "0.1.0"
10+
version = "0.1.2"
1111
authors = ["Swarnim Arun <[email protected]>"]
1212
edition = "2021"
1313
license-file = "LICENSE"

cdracula/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "cdracula"
33
description = "🧛 Count-ing lines, AH AH AHH!"
4-
version = "0.1.0"
4+
version = "0.1.2"
55
authors = ["Swarnim Arun <[email protected]>"]
66
edition = "2021"
77

cdracula/tests/test_capi.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ mod python {
3737
let src = CString::from_vec_unchecked(
3838
(String::from(
3939
r#"
40+
# entp için anayzer
41+
if index == 10:
42+
pass
4043
# skip this
4144
def python():
4245
"""
@@ -47,7 +50,7 @@ def python():
4750
) + "\0")
4851
.into(),
4952
);
50-
assert_eq!(get_meaningful_line_count(src.as_ptr(), PYTHON_LANG, 0), 2);
53+
assert_eq!(get_meaningful_line_count(src.as_ptr(), PYTHON_LANG, 0), 4);
5154
}
5255
}
5356

pydracula/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "pydracula"
3-
version = "0.1.0"
3+
version = "0.1.2"
44
authors = ["Swarnim Arun <[email protected]>"]
55
edition = "2018"
66

src/parse.rs

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ pub struct Parser<'a, L: Language> {
279279
src: &'a str,
280280
index: usize,
281281
language_items: &'static [ParseItem],
282-
_marker: PhantomData<L>
282+
_marker: PhantomData<L>,
283283
}
284284

285285
// most this is only used in tests atm!
@@ -290,7 +290,7 @@ impl<L: Language> Parser<'_, L> {
290290
src,
291291
language_items: L::PARSE_ITEMS,
292292
index: 0,
293-
_marker: PhantomData::default()
293+
_marker: PhantomData::default(),
294294
}
295295
}
296296

@@ -304,35 +304,40 @@ impl<L: Language> Parser<'_, L> {
304304
.find_map(|i| Some((i, items[i].begin().matches(src)?)))
305305
.and_then(|(i, matches)| {
306306
(matches[2].end..src.len()).find_map(|b| {
307-
Some((
308-
i,
309-
b,
310-
if items[i].is_key_matched() {
311-
items[i].end().matches_with_key(
312-
&src[b..],
313-
&src[matches[1].start..matches[1].end],
314-
)?
315-
} else {
316-
items[i].end().matches(&src[b..])?
317-
},
318-
))
307+
if src.is_char_boundary(b) {
308+
Some((
309+
i,
310+
b,
311+
if items[i].is_key_matched() {
312+
items[i].end().matches_with_key(
313+
&src[b..],
314+
&src[matches[1].start..matches[1].end],
315+
)?
316+
} else {
317+
items[i].end().matches(&src[b..])?
318+
},
319+
))
320+
} else {
321+
None
322+
}
319323
})
320324
})
321325
{
322-
Ok(items[i].to_parse_output(&src[0..b + end_matches[2].end]))
326+
Ok(items[i].to_parse_output(&src[..b + end_matches[2].end]))
323327
} else if let Some(end) = (1..=src.len()).find(|&idx| {
324-
idx == src.len()
325-
|| src[idx..].starts_with('\n')
326-
|| items
327-
.iter()
328-
.find_map(|i| i.begin().matches(&src[idx..]))
329-
.is_some()
328+
src.is_char_boundary(idx)
329+
&& (idx == src.len()
330+
|| src[idx..].starts_with('\n')
331+
|| items
332+
.iter()
333+
.find_map(|i| i.begin().matches(&src[idx..]))
334+
.is_some())
330335
}) {
331336
// if it's not a range then it's a source line
332337
if end == 0 {
333338
Err("Failed to parse, for some random reason, pls lookie here")?;
334339
}
335-
Ok(ParseOutput::Source(&src[0..end]))
340+
Ok(ParseOutput::Source(&src[..end]))
336341
} else {
337342
Err("Failed to parse the rest.")?
338343
}

src/tests.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@ mod simple_python {
3939
#[test]
4040
fn try_parse() {
4141
let parsed = Parser::<Python>::new(
42-
r#"# some top level comments
42+
r#"# entp için anayzer
43+
if index == 10:
44+
pass
45+
# some top level comments
4346
def main():
4447
print("s");"""
4548
Multi-line Comments
@@ -53,6 +56,7 @@ mod simple_python {
5356
let mut line_count: usize = 0;
5457
let mut stack = vec![];
5558
for p in parsed {
59+
eprintln!("{:?}", p);
5660
if matches!(p, ParseOutput::EOL(_) | ParseOutput::EOF) {
5761
if stack.iter().any(|i| match i {
5862
ParseOutput::Source(s) => Python::is_meaningful_src(s),
@@ -65,7 +69,7 @@ mod simple_python {
6569
stack.push(p);
6670
}
6771
}
68-
assert_eq!(line_count, 3)
72+
assert_eq!(line_count, 5)
6973
}
7074
}
7175

@@ -86,10 +90,11 @@ mod simple_rust {
8690
// the platform to be present.
8791
fn main() {
8892
let c = 2;
89-
/* Multi-Line Comments
93+
/* Multi-Line Comments यह काम करना चाहिए
9094
seems to work as well */
91-
let src = "hello, World!";
92-
let src2 = r#"hello, World!"#;
95+
let यह = "hello, World!";
96+
let src = "Gello, World!";
97+
let src2 = r#"यह, काम!"#;
9398
return 0;
9499
}
95100
"##,

0 commit comments

Comments
 (0)