Skip to content

Commit e3bbb70

Browse files
authored
Merge pull request #65 from tinaun/unicode-fix
fix CharacterSet::any to match non-ascii chars
2 parents 35e62df + ca45c1d commit e3bbb70

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

src/state_machine.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,23 @@ use std::{collections::HashMap, u64};
77
pub(crate) struct CharacterSet {
88
low_mask: u64,
99
high_mask: u64,
10+
any: bool,
1011
}
1112

1213
impl CharacterSet {
1314
pub(crate) fn new() -> Self {
1415
Self {
1516
low_mask: 0,
1617
high_mask: 0,
18+
any: false,
1719
}
1820
}
1921

2022
pub(crate) fn any() -> Self {
2123
Self {
2224
low_mask: u64::MAX,
2325
high_mask: u64::MAX,
26+
any: true,
2427
}
2528
}
2629

@@ -72,7 +75,7 @@ impl CharacterSet {
7275
let bit = 1 << val - 64;
7376
self.high_mask & bit != 0
7477
}
75-
_ => false,
78+
_ => self.any,
7679
}
7780
}
7881

@@ -234,7 +237,7 @@ impl<T> StateMachine<T> {
234237
pub(crate) fn process<'m>(&'m self, input: &'m str) -> Option<Match<'m, T>> {
235238
let mut traversals = vec![Traversal::new()];
236239

237-
for (i, ch) in input.chars().enumerate() {
240+
for (i, ch) in input.char_indices() {
238241
let next_traversals = self.process_char(traversals, ch, i);
239242
traversals = next_traversals;
240243

0 commit comments

Comments
 (0)