Skip to content

Commit 401a63a

Browse files
authored
Merge pull request #120 from rxwei/capture-type-nil
Fix two crashers with quantification on transformed capture.
2 parents fdce624 + 4f14de3 commit 401a63a

File tree

6 files changed

+135
-31
lines changed

6 files changed

+135
-31
lines changed

Sources/_MatchingEngine/Regex/AST/AST.swift

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,13 @@ extension AST {
9494

9595
/// Whether this node has nested somewhere inside it a capture
9696
public var hasCapture: Bool {
97-
if case let .group(g) = self, g.kind.value.isCapturing {
97+
switch self {
98+
case .group(let g) where g.kind.value.isCapturing,
99+
.groupTransform(let g, _) where g.kind.value.isCapturing:
98100
return true
101+
default:
102+
break
99103
}
100-
101104
return self.children?.any(\.hasCapture) ?? false
102105
}
103106
}
@@ -207,14 +210,18 @@ extension AST {
207210

208211
// FIXME: Get this out of here
209212
public struct CaptureTransform: Equatable, Hashable, CustomStringConvertible {
213+
public let resultType: Any.Type
210214
public let closure: (Substring) -> Any
211215

212-
public init(_ closure: @escaping (Substring) -> Any) {
216+
public init(resultType: Any.Type, _ closure: @escaping (Substring) -> Any) {
217+
self.resultType = resultType
213218
self.closure = closure
214219
}
215220

216221
public func callAsFunction(_ input: Substring) -> Any {
217-
closure(input)
222+
let result = closure(input)
223+
assert(type(of: result) == resultType)
224+
return result
218225
}
219226

220227
public static func == (lhs: CaptureTransform, rhs: CaptureTransform) -> Bool {
@@ -229,7 +236,6 @@ public struct CaptureTransform: Equatable, Hashable, CustomStringConvertible {
229236
}
230237

231238
public var description: String {
232-
"<transform>"
239+
"<transform result_type=\(resultType)>"
233240
}
234241
}
235-

Sources/_MatchingEngine/Regex/Parse/CaptureStructure.swift

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
// A tree representing the type of some captures.
1313
public enum CaptureStructure: Equatable {
14-
case atom(name: String? = nil)
14+
case atom(name: String? = nil, type: AnyType? = nil)
1515
indirect case array(CaptureStructure)
1616
indirect case optional(CaptureStructure)
1717
indirect case tuple([CaptureStructure])
@@ -47,6 +47,17 @@ extension AST {
4747
default:
4848
return innerCaptures
4949
}
50+
case .groupTransform(let group, let transform):
51+
let innerCaptures = group.child.captureStructure
52+
switch group.kind.value {
53+
case .capture:
54+
return .atom(type: AnyType(transform.resultType)) + innerCaptures
55+
case .namedCapture(let name):
56+
return .atom(name: name.value, type: AnyType(transform.resultType))
57+
+ innerCaptures
58+
default:
59+
return innerCaptures
60+
}
5061
case .conditional(let c):
5162
// A conditional's capture structure is effectively that of an alternation
5263
// between the true and false branches. However the condition may also
@@ -67,8 +78,6 @@ extension AST {
6778
quantification.amount.value == .zeroOrOne
6879
? CaptureStructure.optional
6980
: CaptureStructure.array)
70-
case .groupTransform:
71-
fatalError("Unreachable. Case will be removed later.")
7281
case .quote, .trivia, .atom, .customCharacterClass, .empty:
7382
return .empty
7483
}
@@ -135,8 +144,10 @@ extension CaptureStructure {
135144

136145
public func type(withAtomType atomType: Any.Type) -> Any.Type {
137146
switch self {
138-
case .atom:
147+
case .atom(_, type: nil):
139148
return atomType
149+
case .atom(_, type: let type?):
150+
return type.base
140151
case .array(let child):
141152
return TypeConstruction.arrayType(of: child.type(withAtomType: atomType))
142153
case .optional(let child):
@@ -213,16 +224,18 @@ extension CaptureStructure {
213224
func encode(_ node: CaptureStructure, isTopLevel: Bool = false) {
214225
switch node {
215226
// 〚`T` (atom)〛 ==> .atom
216-
case .atom(name: nil):
227+
case .atom(name: nil, type: nil):
217228
append(.atom)
218229
// 〚`name: T` (atom)〛 ==> .atom, `name`, '\0'
219-
case .atom(name: let name?):
230+
case .atom(name: let name?, type: nil):
220231
append(.namedAtom)
221232
let nameCString = name.utf8CString
222233
let nameSlot = UnsafeMutableRawBufferPointer(
223234
rebasing: buffer[offset ..< offset+nameCString.count])
224235
nameCString.withUnsafeBytes(nameSlot.copyMemory(from:))
225236
offset += nameCString.count
237+
case .atom(_, _?):
238+
fatalError("Cannot encode a capture structure with explicit types")
226239
// 〚`[T]`〛 ==> 〚`T`〛, .formArray
227240
case .array(let child):
228241
encode(child)

Sources/_MatchingEngine/Utility/Misc.swift

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,3 +149,19 @@ extension BinaryInteger {
149149
}
150150
}
151151

152+
/// A wrapper of an existential metatype, equatable and hashable by reference.
153+
public struct AnyType: Equatable, Hashable {
154+
public var base: Any.Type
155+
156+
public init(_ type: Any.Type) {
157+
base = type
158+
}
159+
160+
public static func == (lhs: AnyType, rhs: AnyType) -> Bool {
161+
lhs.base == rhs.base
162+
}
163+
164+
public func hash(into hasher: inout Hasher) {
165+
hasher.combine(ObjectIdentifier(base))
166+
}
167+
}

Sources/_StringProcessing/Capture.swift

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@ extension Capture {
5151
}
5252
return _openExistential(childType.base, do: helper)
5353
case .some(let subcapture):
54-
return subcapture.value
54+
func helper<T>(_ value: T) -> Any {
55+
Optional(value) as Any
56+
}
57+
return _openExistential(subcapture.value, do: helper)
5558
case .none(let childType):
5659
func helper<T>(_: T.Type) -> Any {
5760
nil as T? as Any

Sources/_StringProcessing/RegexDSL/DSL.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ public struct CapturingGroup<Match: MatchProtocol>: RegexProtocol {
179179
self.regex = .init(ast:
180180
.groupTransform(
181181
.init(.init(faking: .capture), component.regex.ast, .fake),
182-
transform: CaptureTransform {
182+
transform: CaptureTransform(resultType: NewCapture.self) {
183183
transform($0) as Any
184184
}))
185185
}

Tests/RegexTests/RegexDSLTests.swift

Lines changed: 83 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,55 @@ class RegexDSLTests: XCTestCase {
8585
== Tuple3("b", "cccc", ["d", "d", "d"]))
8686
}
8787

88+
func testQuantificationWithTransformedCapture() throws {
89+
// This test is to make sure transformed capture type information is
90+
// correctly propagated from the DSL into the bytecode and that the engine
91+
// is reconstructing the right types upon quantification (both empty and
92+
// non-empty).
93+
enum Word: Int32 {
94+
case apple
95+
case orange
96+
97+
init?(_ string: Substring) {
98+
switch string {
99+
case "apple": self = .apple
100+
case "orange": self = .orange
101+
default: return nil
102+
}
103+
}
104+
}
105+
let regex = Regex {
106+
"a".+
107+
OneOrMore(.whitespace)
108+
Optionally {
109+
OneOrMore(.digit).capture { Int($0)! }
110+
}
111+
Repeat {
112+
OneOrMore(.whitespace)
113+
OneOrMore(.word).capture { Word($0)! }
114+
}
115+
}
116+
// Assert the inferred capture type.
117+
let _: Tuple3<Substring, Int?, [Word]>.Type
118+
= type(of: regex).Match.self
119+
do {
120+
let input = "aaa 123 apple orange apple"
121+
let match = input.match(regex)?.match.tuple
122+
let (whole, number, words) = try XCTUnwrap(match)
123+
XCTAssertTrue(whole == input)
124+
XCTAssertEqual(number, 123)
125+
XCTAssertEqual(words, [.apple, .orange, .apple])
126+
}
127+
do {
128+
let input = "aaa "
129+
let match = input.match(regex)?.match.tuple
130+
let (whole, number, words) = try XCTUnwrap(match)
131+
XCTAssertTrue(whole == input)
132+
XCTAssertEqual(number, nil)
133+
XCTAssertTrue(words.isEmpty)
134+
}
135+
}
136+
88137
// Note: Types of nested captures should be flat, but are currently nested
89138
// due to the lack of variadic generics. Without it, we cannot effectively
90139
// express type constraints to concatenate splatted tuples.
@@ -179,39 +228,46 @@ class RegexDSLTests: XCTestCase {
179228
let line = """
180229
A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
181230
"""
182-
let regex = Regex {
183-
OneOrMore(CharacterClass.hexDigit).capture()
231+
232+
let regexWithCapture = Regex {
233+
OneOrMore(CharacterClass.hexDigit).capture(Unicode.Scalar.init(hex:))
184234
Optionally {
185235
".."
186-
OneOrMore(CharacterClass.hexDigit).capture()
236+
OneOrMore(CharacterClass.hexDigit).capture(Unicode.Scalar.init(hex:))
187237
}
188238
OneOrMore(CharacterClass.whitespace)
189239
";"
190240
OneOrMore(CharacterClass.whitespace)
191241
OneOrMore(CharacterClass.word).capture()
192242
Repeat(CharacterClass.any)
243+
} // Regex<(Substring, Unicode.Scalar?, Unicode.Scalar??, Substring)>
244+
do {
245+
// Assert the inferred capture type.
246+
typealias ExpectedMatch = Tuple4<
247+
Substring, Unicode.Scalar?, Unicode.Scalar??, Substring
248+
>
249+
let _: ExpectedMatch.Type = type(of: regexWithCapture).Match.self
250+
let maybeMatchResult = line.match(regexWithCapture)
251+
let matchResult = try XCTUnwrap(maybeMatchResult)
252+
let (wholeMatch, lower, upper, propertyString) = matchResult.match.tuple
253+
XCTAssertEqual(wholeMatch, Substring(line))
254+
XCTAssertEqual(lower, Unicode.Scalar(0xA6F0))
255+
XCTAssertEqual(upper, Unicode.Scalar(0xA6F1))
256+
XCTAssertEqual(propertyString, "Extend")
193257
}
194-
// Assert the inferred capture type.
195-
typealias ExpectedMatch = Tuple4<
196-
Substring, Substring, Substring?, Substring
197-
>
198-
let _: ExpectedMatch.Type = type(of: regex).Match.self
199-
func run<R: RegexProtocol>(
200-
_ regex: R
201-
) throws where R.Match == ExpectedMatch {
202-
let maybeMatchResult = line.match(regex)
258+
259+
do {
260+
let regexLiteral = try MockRegexLiteral(
261+
#"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"#,
262+
matching: Tuple4<Substring, Substring, Substring?, Substring>.self)
263+
let maybeMatchResult = line.match(regexLiteral)
203264
let matchResult = try XCTUnwrap(maybeMatchResult)
204265
let (wholeMatch, lower, upper, propertyString) = matchResult.match.tuple
205266
XCTAssertEqual(wholeMatch, Substring(line))
206267
XCTAssertEqual(lower, "A6F0")
207268
XCTAssertEqual(upper, "A6F1")
208269
XCTAssertEqual(propertyString, "Extend")
209270
}
210-
let regexLiteral = try MockRegexLiteral(
211-
#"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"#,
212-
matching: Tuple4<Substring, Substring, Substring?, Substring>.self)
213-
try run(regex)
214-
try run(regexLiteral)
215271
}
216272

217273
func testDynamicCaptures() throws {
@@ -238,3 +294,13 @@ class RegexDSLTests: XCTestCase {
238294
}
239295
}
240296
}
297+
298+
extension Unicode.Scalar {
299+
// Convert a hexadecimal string to a scalar
300+
public init?<S: StringProtocol>(hex: S) {
301+
guard let val = UInt32(hex, radix: 16), let scalar = Self(val) else {
302+
return nil
303+
}
304+
self = scalar
305+
}
306+
}

0 commit comments

Comments
 (0)