Skip to content

Commit 62f92ee

Browse files
committed
work on validator
1 parent ee4550f commit 62f92ee

File tree

2 files changed

+109
-17
lines changed

2 files changed

+109
-17
lines changed

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,15 @@
88
Rust port of regexpp (ECMAScript regular expression parser).
99
This crate is being developed mainly as an efficient validator of ECMAScript regular expressions for
1010
the [`deno_lint` project](https://github.com/denoland/deno_lint).
11+
12+
## Using the Library
13+
14+
TBA
15+
16+
## Performance
17+
18+
TBA
19+
20+
## License
21+
22+
Released under the [MIT License](LICENSE).

src/validator.rs

Lines changed: 97 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ pub struct EcmaRegexValidator {
2424
ecma_version: EcmaVersion,
2525
u_flag: bool,
2626
n_flag: bool,
27+
num_capturing_parens: u32,
28+
group_names: HashSet<String>,
29+
backreference_names: HashSet<String>,
2730
}
2831

2932
impl Deref for EcmaRegexValidator {
@@ -47,6 +50,9 @@ impl EcmaRegexValidator {
4750
ecma_version,
4851
u_flag: false,
4952
n_flag: false,
53+
num_capturing_parens: 0,
54+
group_names: HashSet::new(),
55+
backreference_names: HashSet::new(),
5056
}
5157
}
5258

@@ -102,33 +108,107 @@ impl EcmaRegexValidator {
102108
/// Pattern[U, N]::
103109
/// Disjunction[?U, ?N]
104110
/// ```
105-
fn consume_pattern(&self) {
111+
fn consume_pattern(&mut self) {
106112
let start = self.index();
107-
/*self.num_capturing_parens = this.countCapturingParens()
108-
self.group_names.clear()
109-
self.backreference_names.clear()
113+
self.num_capturing_parens = self.count_capturing_parens();
114+
self.group_names.clear();
115+
self.backreference_names.clear();
110116

111117
//self.onPatternEnter(start)
112-
self.consumeDisjunction()
118+
self.consume_disjunction();
113119

114-
if let Some(cp) = self.current_code_point {
120+
if let Some(&cp) = self.code_point_with_offset(0) {
115121
if cp == ')' {
116-
this.raise("Unmatched ')'");
122+
//this.raise("Unmatched ')'");
117123
}
118-
if (cp == '\\') {
119-
this.raise("\\ at end of pattern");
124+
if cp == '\\' {
125+
//this.raise("\\ at end of pattern");
120126
}
121-
if (cp == ']' || cp == '}') {
122-
this.raise("Lone quantifier brackets");
127+
if cp == ']' || cp == '}' {
128+
//this.raise("Lone quantifier brackets");
123129
}
124-
this.raise("Unexpected character {}", cp);
130+
//this.raise("Unexpected character {}", cp);
125131
}
126-
for name in self.backreference_names {
127-
if !this._groupNames.has(name) {
128-
this.raise("Invalid named capture referenced")
132+
133+
for name in &self.backreference_names {
134+
if !self.group_names.contains(name) {
135+
//this.raise("Invalid named capture referenced")
129136
}
130-
}*/
131-
//self.onPatternLeave(start, this.index)
137+
}
138+
//self.onPatternLeave(start, self.index());
139+
}
140+
141+
/// Validate the next characters as a RegExp `Disjunction` production.
142+
/// ```grammar
143+
/// Disjunction[U, N]::
144+
/// Alternative[?U, ?N]
145+
/// Alternative[?U, ?N] `|` Disjunction[?U, ?N]
146+
/// ```
147+
fn consume_disjunction(&mut self) {
148+
let start = self.index();
149+
let mut i = 0;
150+
151+
//self.onDisjunctionEnter(start);
152+
self.consume_alternative(i);
153+
while self.eat('|') {
154+
i += 1;
155+
self.consume_alternative(i);
156+
}
157+
158+
//if self.consume_quantifier(true) {
159+
//this.raise("Nothing to repeat")
160+
//}
161+
if self.eat('{') {
162+
//this.raise("Lone quantifier brackets")
163+
}
164+
//self.on_disjunction_leave(start, self.index());
165+
}
166+
167+
/// Validate the next characters as a RegExp `Alternative` production.
168+
/// ```grammar
169+
/// Alternative[U, N]::
170+
/// ε
171+
/// Alternative[?U, ?N] Term[?U, ?N]
172+
/// ```
173+
fn consume_alternative(&mut self, i: u32) {
174+
let start = self.index();
175+
176+
//self.on_alternative_enter(start, i)
177+
//while self.code_point_with_offset(0).is_some() && self.consume_term() {
178+
// do nothing
179+
//}
180+
//self.on_alternative_leave(start, self.index(), i);
181+
}
182+
183+
fn count_capturing_parens(&mut self) -> u32 {
184+
let start = self.index();
185+
let mut in_class = false;
186+
let mut escaped = false;
187+
let mut count = 0;
188+
189+
while let Some(&cp) = self.code_point_with_offset(0) {
190+
if escaped {
191+
escaped = false;
192+
} else if cp == '\\' {
193+
escaped = true;
194+
} else if cp == '[' {
195+
in_class = true;
196+
} else if cp == ']' {
197+
in_class = false;
198+
} else if cp == '('
199+
&& !in_class
200+
&& (self.code_point_with_offset(1) != Some(&'?')
201+
|| (self.code_point_with_offset(2) == Some(&'<')
202+
&& self.code_point_with_offset(3) != Some(&'=')
203+
&& self.code_point_with_offset(3) != Some(&'!')))
204+
{
205+
count += 1
206+
}
207+
self.advance();
208+
}
209+
210+
self.rewind(start);
211+
count
132212
}
133213
}
134214

0 commit comments

Comments
 (0)