Skip to content

Commit 13b361c

Browse files
committed
Introduce Source.lookahead
Use this to replace the various places we're doing `var src = self`.
1 parent 3c1c93a commit 13b361c

File tree

1 file changed

+49
-37
lines changed

1 file changed

+49
-37
lines changed

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 49 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,14 @@ extension Source {
149149
return result
150150
}
151151

152+
/// Perform a lookahead using a temporary source. Within the body of the
153+
/// lookahead, any modifications to the source will not be reflected outside
154+
/// the body.
155+
func lookahead<T>(_ body: (inout Source) throws -> T) rethrows -> T {
156+
var src = self
157+
return try body(&src)
158+
}
159+
152160
/// Attempt to eat the given character, returning its source location if
153161
/// successful, `nil` otherwise.
154162
mutating func tryEatWithLoc(_ c: Character) -> SourceLocation? {
@@ -1240,8 +1248,9 @@ extension Source {
12401248

12411249
private func canLexPOSIXCharacterProperty() -> Bool {
12421250
do {
1243-
var src = self
1244-
return try src.lexPOSIXCharacterProperty() != nil
1251+
return try lookahead { src in
1252+
try src.lexPOSIXCharacterProperty() != nil
1253+
}
12451254
} catch {
12461255
// We want to tend on the side of lexing a POSIX character property, so
12471256
// even if it is invalid in some way (e.g invalid property names), still
@@ -1394,10 +1403,11 @@ extension Source {
13941403

13951404
/// Checks whether a numbered reference can be lexed.
13961405
private func canLexNumberedReference() -> Bool {
1397-
var src = self
1398-
_ = src.tryEat(anyOf: "+", "-")
1399-
guard let next = src.peek() else { return false }
1400-
return RadixKind.decimal.characterFilter(next)
1406+
lookahead { src in
1407+
_ = src.tryEat(anyOf: "+", "-")
1408+
guard let next = src.peek() else { return false }
1409+
return RadixKind.decimal.characterFilter(next)
1410+
}
14011411
}
14021412

14031413
/// Eat a named reference up to a given closing delimiter.
@@ -1587,53 +1597,55 @@ extension Source {
15871597

15881598
/// Whether we can lex a group-like reference after the specifier '(?'.
15891599
private func canLexGroupLikeReference() -> Bool {
1590-
var src = self
1591-
if src.tryEat("P") {
1592-
return src.tryEat(anyOf: "=", ">") != nil
1593-
}
1594-
if src.tryEat(anyOf: "&", "R") != nil {
1595-
return true
1600+
lookahead { src in
1601+
if src.tryEat("P") {
1602+
return src.tryEat(anyOf: "=", ">") != nil
1603+
}
1604+
if src.tryEat(anyOf: "&", "R") != nil {
1605+
return true
1606+
}
1607+
return src.canLexNumberedReference()
15961608
}
1597-
return src.canLexNumberedReference()
15981609
}
15991610

16001611
private func canLexMatchingOptionsAsAtom(context: ParsingContext) -> Bool {
1601-
var src = self
1602-
1603-
// See if we can lex a matching option sequence that terminates in ')'. Such
1604-
// a sequence is an atom. If an error is thrown, there are invalid elements
1605-
// of the matching option sequence. In such a case, we can lex as a group
1606-
// and diagnose the invalid group kind.
1607-
guard (try? src.lexMatchingOptionSequence(context: context)) != nil else {
1608-
return false
1612+
lookahead { src in
1613+
// See if we can lex a matching option sequence that terminates in ')'.
1614+
// Such a sequence is an atom. If an error is thrown, there are invalid
1615+
// elements of the matching option sequence. In such a case, we can lex as
1616+
// a group and diagnose the invalid group kind.
1617+
guard (try? src.lexMatchingOptionSequence(context: context)) != nil else {
1618+
return false
1619+
}
1620+
return src.tryEat(")")
16091621
}
1610-
return src.tryEat(")")
16111622
}
16121623

16131624
/// Whether a group specifier should be lexed as an atom instead of a group.
16141625
private func shouldLexGroupLikeAtom(context: ParsingContext) -> Bool {
1615-
var src = self
1616-
guard src.tryEat("(") else { return false }
1626+
lookahead { src in
1627+
guard src.tryEat("(") else { return false }
16171628

1618-
if src.tryEat("?") {
1619-
// The start of a reference '(?P=', '(?R', ...
1620-
if src.canLexGroupLikeReference() { return true }
1629+
if src.tryEat("?") {
1630+
// The start of a reference '(?P=', '(?R', ...
1631+
if src.canLexGroupLikeReference() { return true }
16211632

1622-
// The start of a PCRE callout.
1623-
if src.tryEat("C") { return true }
1633+
// The start of a PCRE callout.
1634+
if src.tryEat("C") { return true }
16241635

1625-
// The start of an Oniguruma 'of-contents' callout.
1626-
if src.tryEat("{") { return true }
1636+
// The start of an Oniguruma 'of-contents' callout.
1637+
if src.tryEat("{") { return true }
16271638

1628-
// A matching option atom (?x), (?i), ...
1629-
if src.canLexMatchingOptionsAsAtom(context: context) { return true }
1639+
// A matching option atom (?x), (?i), ...
1640+
if src.canLexMatchingOptionsAsAtom(context: context) { return true }
1641+
1642+
return false
1643+
}
1644+
// The start of a backreference directive or Oniguruma named callout.
1645+
if src.tryEat("*") { return true }
16301646

16311647
return false
16321648
}
1633-
// The start of a backreference directive or Oniguruma named callout.
1634-
if src.tryEat("*") { return true }
1635-
1636-
return false
16371649
}
16381650

16391651
/// Consume an escaped atom, starting from after the backslash

0 commit comments

Comments
 (0)