Skip to content

Commit 8bad6b4

Browse files
committed
syntax: support LookArounds in AST and HIR
1 parent 6c8a65d commit 8bad6b4

File tree

9 files changed

+354
-0
lines changed

9 files changed

+354
-0
lines changed

regex-syntax/src/ast/mod.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,9 @@ pub enum Ast {
477477
Dot(Box<Span>),
478478
/// A single zero-width assertion.
479479
Assertion(Box<Assertion>),
480+
#[cfg(feature = "look-behinds")]
481+
/// A single look-around regular expression.
482+
LookAround(Box<LookAround>),
480483
/// A single Unicode character class, e.g., `\pL` or `\p{Greek}`.
481484
ClassUnicode(Box<ClassUnicode>),
482485
/// A single perl character class, e.g., `\d` or `\W`.
@@ -521,6 +524,12 @@ impl Ast {
521524
Ast::Assertion(Box::new(e))
522525
}
523526

527+
/// Create a "look-around" AST item.
528+
#[cfg(feature = "look-behinds")]
529+
pub fn lookaround(e: LookAround) -> Ast {
530+
Ast::LookAround(Box::new(e))
531+
}
532+
524533
/// Create a "Unicode class" AST item.
525534
pub fn class_unicode(e: ClassUnicode) -> Ast {
526535
Ast::ClassUnicode(Box::new(e))
@@ -564,6 +573,8 @@ impl Ast {
564573
Ast::Literal(ref x) => &x.span,
565574
Ast::Dot(ref span) => span,
566575
Ast::Assertion(ref x) => &x.span,
576+
#[cfg(feature = "look-behinds")]
577+
Ast::LookAround(ref x) => &x.span,
567578
Ast::ClassUnicode(ref x) => &x.span,
568579
Ast::ClassPerl(ref x) => &x.span,
569580
Ast::ClassBracketed(ref x) => &x.span,
@@ -598,6 +609,8 @@ impl Ast {
598609
| Ast::Group(_)
599610
| Ast::Alternation(_)
600611
| Ast::Concat(_) => true,
612+
#[cfg(feature = "look-behinds")]
613+
Ast::LookAround(_) => true,
601614
}
602615
}
603616
}
@@ -1342,6 +1355,28 @@ pub enum AssertionKind {
13421355
WordBoundaryEndHalf,
13431356
}
13441357

1358+
/// A single zero-width look-around.
1359+
#[derive(Clone, Debug, Eq, PartialEq)]
1360+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1361+
pub struct LookAround {
1362+
/// The span of this look-around.
1363+
pub span: Span,
1364+
/// The look-around kind, e.g. negative/positive look-behind.
1365+
pub kind: LookAroundKind,
1366+
/// The regular expression inside the look-around.
1367+
pub ast: Box<Ast>,
1368+
}
1369+
1370+
/// A look-around kind.
1371+
#[derive(Clone, Debug, Eq, PartialEq)]
1372+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1373+
pub enum LookAroundKind {
1374+
/// `(?<=...)`
1375+
PositiveLookBehind,
1376+
/// `(?<!...)`
1377+
NegativeLookBehind,
1378+
}
1379+
13451380
/// A repetition operation applied to a regular expression.
13461381
#[derive(Clone, Debug, Eq, PartialEq)]
13471382
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
@@ -1647,6 +1682,8 @@ impl Drop for Ast {
16471682
| Ast::ClassBracketed(_) => return,
16481683
Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
16491684
Ast::Group(ref x) if !x.ast.has_subexprs() => return,
1685+
#[cfg(feature = "look-behinds")]
1686+
Ast::LookAround(ref x) if !x.ast.has_subexprs() => return,
16501687
Ast::Alternation(ref x) if x.asts.is_empty() => return,
16511688
Ast::Concat(ref x) if x.asts.is_empty() => return,
16521689
_ => {}
@@ -1673,6 +1710,10 @@ impl Drop for Ast {
16731710
Ast::Group(ref mut x) => {
16741711
stack.push(mem::replace(&mut x.ast, empty_ast()));
16751712
}
1713+
#[cfg(feature = "look-behinds")]
1714+
Ast::LookAround(ref mut x) => {
1715+
stack.push(mem::replace(&mut x.ast, empty_ast()));
1716+
}
16761717
Ast::Alternation(ref mut x) => {
16771718
stack.extend(x.asts.drain(..));
16781719
}

regex-syntax/src/ast/parse.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2328,6 +2328,8 @@ impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
23282328
Ast::ClassBracketed(ref x) => &x.span,
23292329
Ast::Repetition(ref x) => &x.span,
23302330
Ast::Group(ref x) => &x.span,
2331+
#[cfg(feature = "look-behinds")]
2332+
Ast::LookAround(ref x) => &x.span,
23312333
Ast::Alternation(ref x) => &x.span,
23322334
Ast::Concat(ref x) => &x.span,
23332335
};
@@ -2354,6 +2356,11 @@ impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
23542356
self.decrement_depth();
23552357
Ok(())
23562358
}
2359+
#[cfg(feature = "look-behinds")]
2360+
Ast::LookAround(_) => {
2361+
self.decrement_depth();
2362+
Ok(())
2363+
}
23572364
}
23582365
}
23592366

regex-syntax/src/ast/print.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ impl<W: fmt::Write> Visitor for Writer<W> {
8080
fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
8181
match *ast {
8282
Ast::Group(ref x) => self.fmt_group_pre(x),
83+
#[cfg(feature = "look-behinds")]
84+
Ast::LookAround(ref x) => self.fmt_lookaround_pre(x),
8385
Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_pre(x),
8486
_ => Ok(()),
8587
}
@@ -92,6 +94,8 @@ impl<W: fmt::Write> Visitor for Writer<W> {
9294
Ast::Literal(ref x) => self.fmt_literal(x),
9395
Ast::Dot(_) => self.wtr.write_str("."),
9496
Ast::Assertion(ref x) => self.fmt_assertion(x),
97+
#[cfg(feature = "look-behinds")]
98+
Ast::LookAround(ref x) => self.fmt_lookaround_post(x),
9599
Ast::ClassPerl(ref x) => self.fmt_class_perl(x),
96100
Ast::ClassUnicode(ref x) => self.fmt_class_unicode(x),
97101
Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_post(x),
@@ -174,6 +178,20 @@ impl<W: fmt::Write> Writer<W> {
174178
self.wtr.write_str(")")
175179
}
176180

181+
#[cfg(feature = "look-behinds")]
182+
fn fmt_lookaround_pre(&mut self, ast: &ast::LookAround) -> fmt::Result {
183+
use crate::ast::LookAroundKind::*;
184+
match ast.kind {
185+
PositiveLookBehind => self.wtr.write_str("(?<="),
186+
NegativeLookBehind => self.wtr.write_str("(?<!"),
187+
}
188+
}
189+
190+
#[cfg(feature = "look-behinds")]
191+
fn fmt_lookaround_post(&mut self, _ast: &ast::LookAround) -> fmt::Result {
192+
self.wtr.write_str(")")
193+
}
194+
177195
fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
178196
use crate::ast::RepetitionKind::*;
179197
match ast.op.kind {
@@ -511,6 +529,13 @@ mod tests {
511529
roundtrip("(a)");
512530
}
513531

532+
#[cfg(feature = "look-behinds")]
533+
#[test]
534+
fn print_lookaround() {
535+
roundtrip("(?<=a)");
536+
roundtrip("(?<!a)");
537+
}
538+
514539
#[test]
515540
fn print_class() {
516541
roundtrip(r"[abc]");

regex-syntax/src/ast/visitor.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ enum Frame<'a> {
140140
/// A stack frame allocated just before descending into a group's child
141141
/// node.
142142
Group(&'a ast::Group),
143+
/// A stack frame allocated just before descending into a look-around's
144+
/// child node.
145+
#[cfg(feature = "look-behinds")]
146+
LookAround(&'a ast::LookAround),
143147
/// The stack frame used while visiting every child node of a concatenation
144148
/// of expressions.
145149
Concat {
@@ -270,6 +274,8 @@ impl<'a> HeapVisitor<'a> {
270274
}
271275
Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
272276
Ast::Group(ref x) => Some(Frame::Group(x)),
277+
#[cfg(feature = "look-behinds")]
278+
Ast::LookAround(ref x) => Some(Frame::LookAround(x)),
273279
Ast::Concat(ref x) if x.asts.is_empty() => None,
274280
Ast::Concat(ref x) => {
275281
Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] })
@@ -289,6 +295,8 @@ impl<'a> HeapVisitor<'a> {
289295
match induct {
290296
Frame::Repetition(_) => None,
291297
Frame::Group(_) => None,
298+
#[cfg(feature = "look-behinds")]
299+
Frame::LookAround(_) => None,
292300
Frame::Concat { tail, .. } => {
293301
if tail.is_empty() {
294302
None
@@ -444,6 +452,8 @@ impl<'a> Frame<'a> {
444452
match *self {
445453
Frame::Repetition(rep) => &rep.ast,
446454
Frame::Group(group) => &group.ast,
455+
#[cfg(feature = "look-behinds")]
456+
Frame::LookAround(look) => &look.ast,
447457
Frame::Concat { head, .. } => head,
448458
Frame::Alternation { head, .. } => head,
449459
}

regex-syntax/src/hir/literal.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,8 @@ impl Extractor {
173173

174174
match *hir.kind() {
175175
Empty | Look(_) => Seq::singleton(self::Literal::exact(vec![])),
176+
#[cfg(feature = "look-behinds")]
177+
LookAround(_) => Seq::singleton(self::Literal::exact(vec![])),
176178
Literal(hir::Literal(ref bytes)) => {
177179
let mut seq =
178180
Seq::singleton(self::Literal::exact(bytes.to_vec()));
@@ -2453,6 +2455,22 @@ mod tests {
24532455
assert_eq!(expected, e(r"^aZ*b"));
24542456
}
24552457

2458+
#[test]
2459+
#[cfg(feature = "look-behinds")]
2460+
fn lookaround() {
2461+
assert_eq!(exact([E("ab")]), e(r"a(?<=qwa)b"));
2462+
assert_eq!(exact([E("ab")]), e(r"a(?<!qw1)b"));
2463+
2464+
assert_eq!(exact([E("ab")]), e(r"(?<=qwe)ab"));
2465+
assert_eq!(exact([E("ab")]), e(r"(?<!qwe)ab"));
2466+
2467+
assert_eq!(exact([E("ab")]), e(r"ab(?<=qab)"));
2468+
assert_eq!(exact([E("ab")]), e(r"ab(?<!qwe)"));
2469+
2470+
let expected = (seq([I("aZ"), E("ab")]), seq([I("Zb"), E("ab")]));
2471+
assert_eq!(expected, e(r"(?<=foo)aZ*b"));
2472+
}
2473+
24562474
#[test]
24572475
fn repetition() {
24582476
assert_eq!(exact(["a", ""]), e(r"a?"));

0 commit comments

Comments
 (0)