Skip to content

Commit 83c94bf

Browse files
authored
Merge pull request #113 from hamishknight/conditions-apply
2 parents 1f37d34 + a3b5a71 commit 83c94bf

18 files changed

+799
-212
lines changed

Sources/_MatchingEngine/Regex/AST/AST.swift

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ public indirect enum AST:
2323
/// (...)
2424
case group(Group)
2525

26+
/// (?(cond) true-branch | false-branch)
27+
case conditional(Conditional)
28+
2629
case quantification(Quantification)
2730

2831
/// \Q...\E
@@ -55,6 +58,7 @@ extension AST {
5558
case let .alternation(v): return v
5659
case let .concatenation(v): return v
5760
case let .group(v): return v
61+
case let .conditional(v): return v
5862
case let .quantification(v): return v
5963
case let .quote(v): return v
6064
case let .trivia(v): return v
@@ -163,6 +167,42 @@ extension AST {
163167
self.location = location
164168
}
165169
}
170+
171+
public struct Reference: Hashable {
172+
@frozen
173+
public enum Kind: Hashable {
174+
// \n \gn \g{n} \g<n> \g'n' (?n) (?(n)...
175+
// Oniguruma: \k<n>, \k'n'
176+
case absolute(Int)
177+
178+
// \g{-n} \g<+n> \g'+n' \g<-n> \g'-n' (?+n) (?-n)
179+
// (?(+n)... (?(-n)...
180+
// Oniguruma: \k<-n> \k<+n> \k'-n' \k'+n'
181+
case relative(Int)
182+
183+
// \k<name> \k'name' \g{name} \k{name} (?P=name)
184+
// \g<name> \g'name' (?&name) (?P>name)
185+
// (?(<name>)... (?('name')... (?(name)...
186+
case named(String)
187+
188+
/// (?R), (?(R)..., which are equivalent to (?0), (?(0)...
189+
static var recurseWholePattern: Kind { .absolute(0) }
190+
}
191+
public var kind: Kind
192+
193+
/// The location of the inner numeric or textual reference, e.g the location
194+
/// of '-2' in '\g{-2}'.
195+
public var innerLoc: SourceLocation
196+
197+
public init(_ kind: Kind, innerLoc: SourceLocation) {
198+
self.kind = kind
199+
self.innerLoc = innerLoc
200+
}
201+
202+
/// Whether this is a reference that recurses the whole pattern, rather than
203+
/// a group.
204+
public var recursesWholePattern: Bool { kind == .recurseWholePattern }
205+
}
166206
}
167207

168208
// FIXME: Get this out of here

Sources/_MatchingEngine/Regex/AST/Atom.swift

Lines changed: 4 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ extension AST {
6666
// References
6767
case backreference(Reference)
6868
case subpattern(Reference)
69-
case condition(Reference)
7069
}
7170
}
7271
}
@@ -382,44 +381,6 @@ extension AST.Atom.CharacterProperty {
382381
}
383382
}
384383

385-
extension AST.Atom {
386-
public struct Reference: Hashable {
387-
@frozen
388-
public enum Kind: Hashable {
389-
// \n \gn \g{n} \g<n> \g'n' (?n) (?(n)...
390-
// Oniguruma: \k<n>, \k'n'
391-
case absolute(Int)
392-
393-
// \g{-n} \g<+n> \g'+n' \g<-n> \g'-n' (?+n) (?-n)
394-
// (?(+n)... (?(-n)...
395-
// Oniguruma: \k<-n> \k<+n> \k'-n' \k'+n'
396-
case relative(Int)
397-
398-
// \k<name> \k'name' \g{name} \k{name} (?P=name)
399-
// \g<name> \g'name' (?&name) (?P>name)
400-
// (?(<name>)... (?('name')... (?(name)...
401-
case named(String)
402-
403-
/// (?R), (?(R)..., which are equivalent to (?0), (?(0)...
404-
static var recurseWholePattern: Kind { .absolute(0) }
405-
}
406-
public var kind: Kind
407-
408-
/// The location of the inner numeric or textual reference, e.g the location
409-
/// of '-2' in '\g{-2}'.
410-
public var innerLoc: SourceLocation
411-
412-
public init(_ kind: Kind, innerLoc: SourceLocation) {
413-
self.kind = kind
414-
self.innerLoc = innerLoc
415-
}
416-
417-
/// Whether this is a reference that recurses the whole pattern, rather than
418-
/// a group.
419-
public var recursesWholePattern: Bool { kind == .recurseWholePattern }
420-
}
421-
}
422-
423384
extension AST.Atom {
424385
/// Anchors and other built-in zero-width assertions
425386
@frozen
@@ -497,7 +458,7 @@ extension AST.Atom {
497458
fallthrough
498459

499460
case .property, .escaped, .any, .startOfLine, .endOfLine,
500-
.backreference, .subpattern, .condition, .namedCharacter:
461+
.backreference, .subpattern, .namedCharacter:
501462
return nil
502463
}
503464
}
@@ -522,7 +483,7 @@ extension AST.Atom {
522483
return "\\M-\\C-\(x)"
523484

524485
case .property, .escaped, .any, .startOfLine, .endOfLine,
525-
.backreference, .subpattern, .condition, .namedCharacter:
486+
.backreference, .subpattern, .namedCharacter:
526487
return nil
527488
}
528489
}
@@ -534,8 +495,8 @@ extension AST {
534495
case .atom(let a): return a.literalStringValue
535496

536497
case .alternation, .concatenation, .group,
537-
.quantification, .quote, .trivia,
538-
.customCharacterClass, .empty,
498+
.conditional, .quantification, .quote,
499+
.trivia, .customCharacterClass, .empty,
539500
.groupTransform:
540501
return nil
541502
}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2022 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
//
10+
//===----------------------------------------------------------------------===//
11+
12+
extension AST {
13+
public struct Conditional: Hashable, _ASTNode {
14+
public var location: SourceLocation
15+
public var condition: Condition
16+
17+
public var trueBranch: AST
18+
public var pipe: SourceLocation?
19+
public var falseBranch: AST
20+
21+
public init(
22+
_ condition: Condition, trueBranch: AST, pipe: SourceLocation?,
23+
falseBranch: AST, _ location: SourceLocation
24+
) {
25+
self.location = location
26+
self.condition = condition
27+
self.trueBranch = trueBranch
28+
self.pipe = pipe
29+
self.falseBranch = falseBranch
30+
}
31+
}
32+
}
33+
34+
extension AST.Conditional {
35+
public struct Condition: Hashable {
36+
public enum Kind: Hashable {
37+
/// Check to see if a certain group was matched.
38+
case groupMatched(AST.Reference)
39+
40+
// Check for recursion.
41+
case recursionCheck
42+
case groupRecursionCheck(AST.Reference)
43+
44+
/// Define a new group that can be referenced elsewhere.
45+
case defineGroup
46+
47+
/// A PCRE version check.
48+
case pcreVersionCheck(PCREVersionCheck)
49+
50+
/// A group condition, which checks to see if an arbitrary bit of regex
51+
/// matches. Note that the semantics of this differs by engine, .NET only
52+
/// treats it as a lookahead, whereas Oniguruma can evaluate separately
53+
/// from the body of the conditional.
54+
case group(AST.Group)
55+
}
56+
57+
public var kind: Kind
58+
public var location: SourceLocation
59+
60+
public init(_ kind: Kind, _ location: SourceLocation) {
61+
self.kind = kind
62+
self.location = location
63+
}
64+
}
65+
}
66+
67+
extension AST.Conditional.Condition {
68+
public struct PCREVersionNumber: Hashable {
69+
public var major: Int
70+
public var minor: Int
71+
public var location: SourceLocation
72+
73+
public init(major: Int, minor: Int, _ location: SourceLocation) {
74+
self.major = major
75+
self.minor = minor
76+
self.location = location
77+
}
78+
}
79+
public struct PCREVersionCheck: Hashable {
80+
public enum Kind: Hashable {
81+
case equal, greaterThanOrEqual
82+
}
83+
public var kind: AST.Located<Kind>
84+
public var num: PCREVersionNumber
85+
86+
public init(_ kind: AST.Located<Kind>, _ num: PCREVersionNumber) {
87+
self.kind = kind
88+
self.num = num
89+
}
90+
}
91+
}

Sources/_MatchingEngine/Regex/AST/Group.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,14 @@ extension AST.Group.Kind {
9898
return false
9999
}
100100
}
101+
102+
/// If this is a named group, its name, `nil` otherwise.
103+
public var name: String? {
104+
switch self {
105+
case .namedCapture(let name): return name.value
106+
default: return nil
107+
}
108+
}
101109
}
102110

103111
extension AST.Group {

Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,21 @@ extension AST {
4747
default:
4848
return innerCaptures
4949
}
50+
case .conditional(let c):
51+
// A conditional's capture structure is effectively that of an alternation
52+
// between the true and false branches. However the condition may also
53+
// have captures in the case of a group condition.
54+
var captures = CaptureStructure.empty
55+
switch c.condition.kind {
56+
case .group(let g):
57+
captures = captures + AST.group(g).captureStructure
58+
default:
59+
break
60+
}
61+
let branchCaptures = c.trueBranch.captureStructure +
62+
c.falseBranch.captureStructure
63+
return captures + branchCaptures.map(CaptureStructure.optional)
64+
5065
case .quantification(let quantification):
5166
return quantification.child.captureStructure.map(
5267
quantification.amount.value == .zeroOrOne

Sources/_MatchingEngine/Regex/Parse/Diagnostics.swift

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,15 @@ enum ParseError: Error, Hashable {
2828
// Something happened, fall-back for now
2929
case misc(String)
3030

31+
case tooManyBranchesInConditional(Int)
32+
case unsupportedCondition(String)
33+
3134
case expectedASCII(Character)
3235

3336
case expectedNonEmptyContents
37+
case expectedEscape
38+
39+
case cannotReferToWholePattern
3440

3541
case unknownGroupKind(String)
3642

@@ -70,6 +76,14 @@ extension ParseError: CustomStringConvertible {
7076
return s
7177
case .expectedNonEmptyContents:
7278
return "expected non-empty contents"
79+
case .expectedEscape:
80+
return "expected escape sequence"
81+
case .cannotReferToWholePattern:
82+
return "cannot refer to whole pattern here"
83+
case let .tooManyBranchesInConditional(i):
84+
return "expected 2 branches in conditional, have \(i)"
85+
case let .unsupportedCondition(str):
86+
return "\(str) cannot be used as condition"
7387
case let .unknownGroupKind(str):
7488
return "unknown group kind '(\(str)'"
7589
case let .invalidMatchingOption(c):

0 commit comments

Comments
 (0)