Skip to content

Commit bdeec8e

Browse files
committed
Track source location for character class range character
1 parent 227c032 commit bdeec8e

File tree

10 files changed

+75
-29
lines changed

10 files changed

+75
-29
lines changed

Sources/_MatchingEngine/Regex/AST/CustomCharClass.swift

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,25 @@ extension AST {
2121
case custom(CustomCharacterClass)
2222

2323
/// A character range `a-z`
24-
case range(Atom, Atom)
24+
case range(Range)
2525

2626
/// A single character or escape
2727
case atom(Atom)
2828

2929
/// A binary operator applied to sets of members `abc&&def`
3030
case setOperation([Member], Located<SetOp>, [Member])
3131
}
32+
public struct Range: Hashable {
33+
public var lhs: Atom
34+
public var dashLoc: SourceLocation
35+
public var rhs: Atom
36+
37+
public init(_ lhs: Atom, _ dashLoc: SourceLocation, _ rhs: Atom) {
38+
self.lhs = lhs
39+
self.dashLoc = dashLoc
40+
self.rhs = rhs
41+
}
42+
}
3243
public enum SetOp: String, Hashable {
3344
case subtraction = "--"
3445
case intersection = "&&"
@@ -45,3 +56,18 @@ extension AST {
4556
extension AST.CustomCharacterClass {
4657
public var isInverted: Bool { start.value == .inverted }
4758
}
59+
60+
extension CustomCC.Member {
61+
private var _associatedValue: Any {
62+
switch self {
63+
case .custom(let c): return c
64+
case .range(let r): return r
65+
case .atom(let a): return a
66+
case .setOperation(let lhs, let op, let rhs): return (lhs, op, rhs)
67+
}
68+
}
69+
70+
func `as`<T>(_ t: T.Type = T.self) -> T? {
71+
_associatedValue as? T
72+
}
73+
}

Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,14 +1026,19 @@ extension Source {
10261026
/// of a '-' character followed by an atom.
10271027
mutating func lexCustomCharClassRangeEnd(
10281028
priorGroupCount: Int
1029-
) throws -> AST.Atom? {
1029+
) throws -> (dashLoc: SourceLocation, AST.Atom)? {
10301030
// Make sure we don't have a binary operator e.g '--', and the '-' is not
10311031
// ending the custom character class (in which case it is literal).
1032+
let start = currentPosition
10321033
guard peekCCBinOp() == nil && !starts(with: "-]") && tryEat("-") else {
10331034
return nil
10341035
}
1035-
return try lexAtom(isInCustomCharacterClass: true,
1036-
priorGroupCount: priorGroupCount)
1036+
let dashLoc = Location(start ..< currentPosition)
1037+
guard let end = try lexAtom(isInCustomCharacterClass: true,
1038+
priorGroupCount: priorGroupCount) else {
1039+
return nil
1040+
}
1041+
return (dashLoc, end)
10371042
}
10381043
}
10391044

Sources/_MatchingEngine/Regex/Parse/Parse.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -269,14 +269,14 @@ extension Parser {
269269
else { break }
270270

271271
// Range between atoms.
272-
if let rhs = try source.lexCustomCharClassRangeEnd(
272+
if let (dashLoc, rhs) = try source.lexCustomCharClassRangeEnd(
273273
priorGroupCount: priorGroupCount
274274
) {
275275
guard atom.literalCharacterValue != nil &&
276276
rhs.literalCharacterValue != nil else {
277277
throw ParseError.invalidCharacterClassRangeOperand
278278
}
279-
members.append(.range(atom, rhs))
279+
members.append(.range(.init(atom, dashLoc, rhs)))
280280
continue
281281
}
282282

Sources/_MatchingEngine/Regex/Printing/DumpAST.swift

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,10 +185,15 @@ extension AST.CustomCharacterClass.Member: _ASTPrintable {
185185
switch self {
186186
case .custom(let cc): return "\(cc)"
187187
case .atom(let a): return "\(a)"
188-
case .range(let lhs, let rhs):
189-
return "range \(lhs) to \(rhs)"
188+
case .range(let r): return "\(r)"
190189
case .setOperation(let lhs, let op, let rhs):
191190
return "op \(lhs) \(op.value) \(rhs)"
192191
}
193192
}
194193
}
194+
195+
extension AST.CustomCharacterClass.Range: _ASTPrintable {
196+
public var _dumpBase: String {
197+
"\(lhs)-\(rhs)"
198+
}
199+
}

Sources/_MatchingEngine/Regex/Printing/PrintAsCanonical.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,10 @@ extension PrettyPrinter {
8585
switch member {
8686
case .custom(let ccc):
8787
outputAsCanonical(ccc)
88-
case .range(let a, let b):
89-
output(a._canonicalBase)
88+
case .range(let r):
89+
output(r.lhs._canonicalBase)
9090
output("-")
91-
output(b._canonicalBase)
91+
output(r.rhs._canonicalBase)
9292
case .atom(let a):
9393
output(a._canonicalBase)
9494
case .setOperation:

Sources/_MatchingEngine/Regex/Printing/PrintAsPattern.swift

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,16 +139,16 @@ extension PrettyPrinter {
139139
switch member {
140140
case .custom(let ccc):
141141
printAsPattern(ccc)
142-
case .range(let a, let b):
143-
if let lhs = a.literalStringValue,
144-
let rhs = b.literalStringValue {
142+
case .range(let r):
143+
if let lhs = r.lhs.literalStringValue,
144+
let rhs = r.rhs.literalStringValue {
145145
indent()
146146
output(lhs._quoted)
147147
output("...")
148148
output(rhs._quoted)
149149
terminateLine()
150150
} else {
151-
print("// TODO: Range \(a) to \(b)")
151+
print("// TODO: Range \(r.lhs) to \(r.rhs)")
152152
}
153153
case .atom(let a):
154154
if let s = a.literalStringValue {

Sources/_StringProcessing/ASTBuilder.swift

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,9 +263,13 @@ func prop_m(
263263
) -> AST.CustomCharacterClass.Member {
264264
atom_m(.property(.init(kind, isInverted: inverted, isPOSIX: false)))
265265
}
266+
func range_m(
267+
_ lower: AST.Atom, _ upper: AST.Atom
268+
) -> AST.CustomCharacterClass.Member {
269+
.range(.init(lower, .fake, upper))
270+
}
266271
func range_m(
267272
_ lower: AST.Atom.Kind, _ upper: AST.Atom.Kind
268273
) -> AST.CustomCharacterClass.Member {
269-
.range(atom_a(lower), atom_a(upper))
274+
range_m(atom_a(lower), atom_a(upper))
270275
}
271-

Sources/_StringProcessing/CharacterClass.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -384,10 +384,10 @@ extension AST.CustomCharacterClass {
384384
return nil
385385
}
386386
result.append(.characterClass(cc))
387-
case .range(let lhs, let rhs):
387+
case .range(let r):
388388
result.append(.range(
389-
lhs.literalCharacterValue! ...
390-
rhs.literalCharacterValue!))
389+
r.lhs.literalCharacterValue! ...
390+
r.rhs.literalCharacterValue!))
391391

392392
case .atom(let a):
393393
if let cc = a.characterClass {

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,12 +117,12 @@ extension AST.CustomCharacterClass.Member {
117117
case .custom(let ccc):
118118
return try ccc.generateConsumer(opts)
119119

120-
case .range(let lower, let upper):
121-
guard let lhs = lower.literalCharacterValue else {
122-
throw unsupported("\(lower) in range")
120+
case .range(let r):
121+
guard let lhs = r.lhs.literalCharacterValue else {
122+
throw unsupported("\(r.lhs) in range")
123123
}
124-
guard let rhs = upper.literalCharacterValue else {
125-
throw unsupported("\(upper) in range")
124+
guard let rhs = r.rhs.literalCharacterValue else {
125+
throw unsupported("\(r.rhs) in range")
126126
}
127127

128128
return { input, bounds in

Tests/RegexTests/ParseTests.swift

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -278,18 +278,18 @@ extension RegexTests {
278278
"[-|$^:?+*())(*-+-]",
279279
charClass(
280280
"-", "|", "$", "^", ":", "?", "+", "*", "(", ")", ")",
281-
"(", .range("*", "+"), "-"))
281+
"(", range_m("*", "+"), "-"))
282282

283283
parseTest(
284-
"[a-b-c]", charClass(.range("a", "b"), "-", "c"))
284+
"[a-b-c]", charClass(range_m("a", "b"), "-", "c"))
285285

286286
parseTest("[-a-]", charClass("-", "a", "-"))
287287

288-
parseTest("[a-z]", charClass(.range("a", "z")))
288+
parseTest("[a-z]", charClass(range_m("a", "z")))
289289

290290
// FIXME: AST builder helpers for custom char class types
291291
parseTest("[a-d--a-c]", charClass(
292-
.setOperation([.range("a", "d")], .init(faking: .subtraction), [.range("a", "c")])
292+
.setOperation([range_m("a", "d")], .init(faking: .subtraction), [range_m("a", "c")])
293293
))
294294

295295
parseTest("[-]", charClass("-"))
@@ -933,6 +933,12 @@ extension RegexTests {
933933
rangeTest("a|", range(1 ..< 2), at: { $0.as(Alt.self)!.pipes[0] })
934934
rangeTest("a|b", range(1 ..< 2), at: { $0.as(Alt.self)!.pipes[0] })
935935
rangeTest("|||", range(1 ..< 2), at: { $0.as(Alt.self)!.pipes[1] })
936+
937+
// MARK: Custom character classes
938+
939+
rangeTest("[a-z]", range(2 ..< 3), at: {
940+
$0.as(CustomCC.self)!.members[0].as(CustomCC.Range.self)!.dashLoc
941+
})
936942
}
937943

938944
func testParseErrors() {

0 commit comments

Comments
 (0)