Skip to content

Commit 67d97e5

Browse files
committed
Diagnose backreferences to the whole pattern
Only subpatterns support whole-pattern recursion. In addition, plumb through the `eatEnding` param.
1 parent e4ccd09 commit 67d97e5

File tree

3 files changed

+26
-9
lines changed

3 files changed

+26
-9
lines changed

Sources/_MatchingEngine/Regex/Parse/Diagnostics.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ enum ParseError: Error, Hashable {
3333
case expectedNonEmptyContents
3434
case expectedEscape
3535

36+
case cannotReferToWholePattern
37+
3638
case unknownGroupKind(String)
3739

3840
case invalidMatchingOption(Character)
@@ -73,6 +75,8 @@ extension ParseError: CustomStringConvertible {
7375
return "expected non-empty contents"
7476
case .expectedEscape:
7577
return "expected escape sequence"
78+
case .cannotReferToWholePattern:
79+
return "cannot refer to whole pattern here"
7680
case let .unknownGroupKind(str):
7781
return "unknown group kind '(\(str)'"
7882
case let .invalidMatchingOption(c):

Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -854,6 +854,7 @@ extension Source {
854854
/// NumberRef -> ('+' | '-')? <Decimal Number>
855855
///
856856
private mutating func lexNumberedReference(
857+
allowWholePatternRef: Bool = false
857858
) throws -> AST.Atom.Reference? {
858859
let kind = try recordLoc { src -> AST.Atom.Reference.Kind? in
859860
// Note this logic should match canLexNumberedReference.
@@ -869,6 +870,9 @@ extension Source {
869870
return nil
870871
}
871872
guard let kind = kind else { return nil }
873+
guard allowWholePatternRef || kind.value != .recurseWholePattern else {
874+
throw ParseError.cannotReferToWholePattern
875+
}
872876
return .init(kind.value, innerLoc: kind.location)
873877
}
874878

@@ -882,10 +886,10 @@ extension Source {
882886

883887
/// Eat a named reference up to a given closing delimiter.
884888
private mutating func expectNamedReference(
885-
endingWith end: String
889+
endingWith end: String, eatEnding: Bool = true
886890
) throws -> AST.Atom.Reference {
887891
// TODO: Group name validation, see comment in lexGroupStart.
888-
let str = try expectQuoted(endingWith: end)
892+
let str = try expectQuoted(endingWith: end, eatEnding: eatEnding)
889893
return .init(.named(str.value), innerLoc: str.location)
890894
}
891895

@@ -894,13 +898,18 @@ extension Source {
894898
/// NameOrNumberRef -> NumberRef | <String>
895899
///
896900
private mutating func expectNamedOrNumberedReference(
897-
endingWith ending: String
901+
endingWith ending: String, eatEnding: Bool = true,
902+
allowWholePatternRef: Bool = false
898903
) throws -> AST.Atom.Reference {
899-
if let numbered = try lexNumberedReference() {
900-
try expect(sequence: ending)
904+
if let numbered = try lexNumberedReference(
905+
allowWholePatternRef: allowWholePatternRef
906+
) {
907+
if eatEnding {
908+
try expect(sequence: ending)
909+
}
901910
return numbered
902911
}
903-
return try expectNamedReference(endingWith: ending)
912+
return try expectNamedReference(endingWith: ending, eatEnding: eatEnding)
904913
}
905914

906915
private static func getClosingDelimiter(
@@ -943,8 +952,8 @@ extension Source {
943952
// Oniguruma-style subpatterns.
944953
if let openChar = src.tryEat(anyOf: "<", "'") {
945954
let closing = String(Source.getClosingDelimiter(for: openChar))
946-
return .subpattern(
947-
try src.expectNamedOrNumberedReference(endingWith: closing))
955+
return .subpattern(try src.expectNamedOrNumberedReference(
956+
endingWith: closing, allowWholePatternRef: true))
948957
}
949958

950959
// PCRE allows \g followed by a bare numeric reference.
@@ -1029,7 +1038,7 @@ extension Source {
10291038
}
10301039

10311040
// Numbered subpattern reference.
1032-
if let ref = try src.lexNumberedReference() {
1041+
if let ref = try src.lexNumberedReference(allowWholePatternRef: true) {
10331042
try src.expect(")")
10341043
return .subpattern(ref)
10351044
}

Tests/RegexTests/ParseTests.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,5 +1119,9 @@ extension RegexTests {
11191119
diagnosticTest(#"\k''"#, .expectedNonEmptyContents)
11201120
diagnosticTest(#"(?&)"#, .expectedNonEmptyContents)
11211121
diagnosticTest(#"(?P>)"#, .expectedNonEmptyContents)
1122+
1123+
// MARK: References
1124+
1125+
diagnosticTest(#"\g{0}"#, .cannotReferToWholePattern)
11221126
}
11231127
}

0 commit comments

Comments
 (0)