Skip to content

Commit 477b6ed

Browse files
committed
make Reader functional (at least for ASCII)
1 parent 0d25a8c commit 477b6ed

File tree

3 files changed

+176
-107
lines changed

3 files changed

+176
-107
lines changed

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
mod reader;
12
mod validator;
23

34
pub use validator::EcmaRegexValidator;

src/reader.rs

Lines changed: 97 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,137 +1,91 @@
11
// Copyright (C) 2020 Quentin M. Kniep <[email protected]>
22
// Distributed under terms of the MIT license.
33

4-
const legacyImpl = {
5-
at(s: string, end: number, i: number): number {
6-
return i < end ? s.charCodeAt(i) : -1
7-
},
8-
width(c: number): number {
9-
return 1
10-
},
11-
}
12-
13-
const unicodeImpl = {
14-
at(s: string, end: number, i: number): number {
15-
return i < end ? s.codePointAt(i)! : -1
16-
},
17-
width(c: number): number {
18-
return c > 0xffff ? 2 : 1
19-
},
20-
}
4+
use std::collections::VecDeque;
215

22-
struct Reader {
23-
implem = legacyImpl;
24-
src: &str,
6+
#[derive(Debug)]
7+
pub struct Reader {
8+
unicode: bool,
9+
src: String,
2510
index: usize,
2611
end: usize,
27-
cp1: Option<char>,
28-
w1: usize,
29-
cp2: Option<char>,
30-
w2: usize,
31-
cp3: Option<char>,
32-
w3: usize,
33-
cp4: Option<char>,
12+
cps: VecDeque<char>,
13+
widths: VecDeque<usize>,
3414
}
3515

3616
impl Reader {
3717
pub fn new() -> Self {
3818
Self {
39-
implem: legacyImpl,
40-
src: "",
19+
unicode: false,
20+
src: "".to_string(),
4121
index: 0,
4222
end: 0,
43-
cp1: None,
44-
w1: 1,
45-
cp2: None,
46-
w2: 1,
47-
cp3: None,
48-
w3: 1,
49-
cp4: None,
50-
w4: 1,
23+
cps: VecDeque::with_capacity(4),
24+
widths: VecDeque::with_capacity(3),
5125
}
5226
}
5327

5428
pub fn source(&self) -> &str {
55-
self.src
29+
&self.src
5630
}
5731

5832
pub fn index(&self) -> usize {
5933
self.index
6034
}
6135

62-
pub fn currentCodePoint(&self) -> char {
63-
self.cp1
64-
}
65-
66-
pub fn nextCodePoint() -> char {
67-
self.cp2
68-
}
69-
70-
pub fn nextCodePoint2() -> char {
71-
self.cp3
36+
pub fn code_point_with_offset(&self, offset: usize) -> Option<&char> {
37+
self.cps.get(offset)
7238
}
7339

74-
pub fn nextCodePoint3() -> char {
75-
self.cp4
76-
}
77-
78-
pub fn reset(
79-
&mut self,
80-
source: &str,
81-
start: usize,
82-
end: usize,
83-
uFlag: bool,
84-
) {
85-
self.implem = uFlag ? unicodeImpl : legacyImpl;
86-
self.src = source;
40+
pub fn reset(&mut self, source: &str, start: usize, end: usize, u_flag: bool) {
41+
self.unicode = u_flag;
42+
self.src = source.into();
8743
self.end = end;
8844
self.rewind(start);
8945
}
9046

9147
pub fn rewind(&mut self, index: usize) {
92-
let implem = self.implem;
9348
self.index = index;
94-
self.cp1 = implem.at(self.src, self.end, index);
95-
self.w1 = implem.width(this.cp1);
96-
self.cp2 = implem.at(self.src, self.end, index + self.w1);
97-
self.w2 = implem.width(this.cp2);
98-
self.cp3 = implem.at(self.src, self.end, index + self.w1 + self.w2);
99-
self.w3 = implem.width(self.cp3);
100-
self.cp4 = implem.at(
101-
self.src,
102-
self.end,
103-
index + self.w1 + self.w2 + self.w3,
104-
);
49+
self.cps.clear();
50+
self.widths.clear();
51+
for i in 0..4 {
52+
let w_sum: usize = self.widths.iter().take(i).sum();
53+
if let Some(c) = self.at(index + w_sum) {
54+
self.cps.push_back(c);
55+
self.widths.push_back(self.width(c));
56+
} else {
57+
break;
58+
}
59+
}
10560
}
10661

10762
pub fn advance(&mut self) {
108-
if self.cp1 != -1 {
109-
let implem = self.implem;
110-
self.index += self.w1;
111-
self.cp1 = self.cp2;
112-
self.w1 = self.w2;
113-
self.cp2 = self.cp3;
114-
self.w2 = implem.width(self.cp2);
115-
self.cp3 = self.cp4;
116-
self.w3 = implem.width(self.cp3);
117-
self.cp4 = implem.at(
118-
self.src,
119-
self.end,
120-
self.index + self.w1 + self.w2 + self.w3,
121-
);
63+
if self.cps.get(0).is_some() {
64+
self.index += self.widths[0];
65+
self.cps.pop_front();
66+
self.widths.pop_front();
67+
let w_sum: usize = self.widths.iter().sum();
68+
if let Some(c) = self.at(self.index + w_sum) {
69+
self.widths.push_back(self.width(*self.cps.back().unwrap()));
70+
self.cps.push_back(c);
71+
}
12272
}
73+
println!("{:?}", self.cps);
74+
println!("{:?}", self.widths);
12375
}
12476

12577
pub fn eat(&mut self, cp: char) -> bool {
126-
if self.cp1 == cp {
78+
let opt = self.cps.get(0);
79+
if opt.is_some() && *opt.unwrap() == cp {
12780
self.advance();
12881
return true;
12982
}
13083
return false;
13184
}
13285

13386
pub fn eat2(&mut self, cp1: char, cp2: char) -> bool {
134-
if self.cp1 == cp1 && self.cp2 == cp2 {
87+
let (opt1, opt2) = (self.cps.get(0), self.cps.get(1));
88+
if opt1.is_some() && opt2.is_some() && *opt1.unwrap() == cp1 && *opt2.unwrap() == cp2 {
13589
self.advance();
13690
self.advance();
13791
return true;
@@ -140,24 +94,53 @@ impl Reader {
14094
}
14195

14296
pub fn eat3(&mut self, cp1: char, cp2: char, cp3: char) -> bool {
143-
if self.cp1 == cp1 && self.cp2 == cp2 && self.cp3 == cp3 {
97+
let (opt1, opt2, opt3) = (self.cps.get(0), self.cps.get(1), self.cps.get(2));
98+
if opt1.is_some()
99+
&& opt2.is_some()
100+
&& opt3.is_some()
101+
&& *opt1.unwrap() == cp1
102+
&& *opt2.unwrap() == cp2
103+
&& *opt3.unwrap() == cp3
104+
{
144105
self.advance();
145106
self.advance();
146107
self.advance();
147108
return true;
148109
}
149110
return false;
150111
}
151-
}
152112

113+
fn at(&self, i: usize) -> Option<char> {
114+
println!("{:x?}", self.src.as_bytes());
115+
if i >= self.end {
116+
None
117+
} else if self.unicode {
118+
// TODO: read non ASCII as UTF-8
119+
let c: char = self.src.as_bytes()[i].into();
120+
Some(c)
121+
} else {
122+
// TODO: read non ASCII as UTF-16
123+
let c: char = self.src.as_bytes()[i].into();
124+
Some(c)
125+
}
126+
}
127+
128+
fn width(&self, c: char) -> usize {
129+
if self.unicode && c > '\u{FFFF}' {
130+
2
131+
} else {
132+
1
133+
}
134+
}
135+
}
153136

154137
#[cfg(test)]
155138
mod tests {
156139
use super::*;
157140

158141
#[test]
159142
fn eat_test() {
160-
let reader = Reader::new();
143+
let mut reader = Reader::new();
161144
reader.reset("abcdefghijk", 0, 11, true);
162145
assert_eq!(reader.eat('a'), true);
163146
assert_eq!(reader.eat3('b', 'd', 'd'), false);
@@ -168,4 +151,26 @@ mod tests {
168151
assert_eq!(reader.eat2('h', 'i'), true);
169152
assert_eq!(reader.eat3('j', 'k', 'a'), false);
170153
}
154+
155+
#[test]
156+
fn rewind_test() {
157+
let mut reader = Reader::new();
158+
reader.reset("abcd", 0, 4, true);
159+
assert_eq!(reader.eat('a'), true);
160+
assert_eq!(reader.eat3('b', 'd', 'd'), false);
161+
assert_eq!(reader.eat3('b', 'c', 'd'), true);
162+
reader.rewind(0);
163+
assert_eq!(reader.eat('a'), true);
164+
assert_eq!(reader.eat3('b', 'd', 'd'), false);
165+
assert_eq!(reader.eat3('b', 'c', 'd'), true);
166+
}
167+
168+
/*#[test]
169+
fn at_test_es_compliance() {
170+
let mut reader = Reader::new();
171+
reader.reset("􀃃a🩢☃★♲", 0, 20, false);
172+
assert_eq!(reader.at(0).unwrap() as u32, 56256);
173+
reader.reset("􀃃a🩢☃★♲", 0, 20, true);
174+
assert_eq!(reader.at(0).unwrap() as u32, 1048771);
175+
}*/
171176
}

0 commit comments

Comments
 (0)