Skip to content

Commit 01b9fdb

Browse files
committed
Add location info to Reference
1 parent 2c424b9 commit 01b9fdb

File tree

4 files changed

+57
-41
lines changed

4 files changed

+57
-41
lines changed

Sources/_MatchingEngine/Regex/AST/Atom.swift

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -382,34 +382,39 @@ extension AST.Atom.CharacterProperty {
382382
}
383383
}
384384

385+
extension AST.Atom {
386+
public struct Reference: Hashable {
387+
@frozen
388+
public enum Kind: Hashable {
389+
// \n \gn \g{n} \g<n> \g'n' (?n) (?(n)...
390+
// Oniguruma: \k<n>, \k'n'
391+
case absolute(Int)
392+
393+
// \g{-n} \g<+n> \g'+n' \g<-n> \g'-n' (?+n) (?-n)
394+
// (?(+n)... (?(-n)...
395+
// Oniguruma: \k<-n> \k<+n> \k'-n' \k'+n'
396+
case relative(Int)
397+
398+
// \k<name> \k'name' \g{name} \k{name} (?P=name)
399+
// \g<name> \g'name' (?&name) (?P>name)
400+
// (?(<name>)... (?('name')... (?(name)...
401+
case named(String)
402+
403+
// ?(R) (?(R)...
404+
case recurseWholePattern
405+
}
406+
public var kind: Kind
385407

386-
// TODO: I haven't thought through this a bunch; this seems like
387-
// a sensible type to have and break down this way. But it could
388-
// easily get folded in with the kind of reference
389-
@frozen
390-
public enum Reference: Hashable {
391-
// \n \gn \g{n} \g<n> \g'n' (?n) (?(n)...
392-
// Oniguruma: \k<n>, \k'n'
393-
case absolute(Int)
394-
395-
// \g{-n} \g<+n> \g'+n' \g<-n> \g'-n' (?+n) (?-n)
396-
// (?(+n)... (?(-n)...
397-
// Oniguruma: \k<-n> \k<+n> \k'-n' \k'+n'
398-
case relative(Int)
399-
400-
// \k<name> \k'name' \g{name} \k{name} (?P=name)
401-
// \g<name> \g'name' (?&name) (?P>name)
402-
// (?(<name>)... (?('name')... (?(name)...
403-
case named(String)
404-
405-
// TODO: I'm not sure the below goes here
406-
//
407-
// ?(R) (?(R)...
408-
case recurseWholePattern
409-
}
410-
411-
408+
/// The location of the inner numeric or textual reference, e.g the location
409+
/// of '-2' in '\g{-2}'.
410+
public var innerLoc: SourceLocation
412411

412+
public init(_ kind: Kind, innerLoc: SourceLocation) {
413+
self.kind = kind
414+
self.innerLoc = innerLoc
415+
}
416+
}
417+
}
413418

414419
extension AST.Atom {
415420
/// Anchors and other built-in zero-width assertions

Sources/_MatchingEngine/Regex/Parse/LexicalAnalysis.swift

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -825,8 +825,8 @@ extension Source {
825825
/// NumberRef -> ('+' | '-')? <Decimal Number>
826826
///
827827
private mutating func lexNumberedReference(
828-
) throws -> Located<Reference>? {
829-
try recordLoc { src in
828+
) throws -> AST.Atom.Reference? {
829+
let kind = try recordLoc { src -> AST.Atom.Reference.Kind? in
830830
if src.tryEat("+") {
831831
return .relative(try src.expectNumber().value)
832832
}
@@ -838,13 +838,16 @@ extension Source {
838838
}
839839
return nil
840840
}
841+
guard let kind = kind else { return nil }
842+
return .init(kind.value, innerLoc: kind.location)
841843
}
842844

843845
/// Eat a named reference up to a given closing delimiter.
844846
private mutating func expectNamedReference(
845847
endingWith end: String
846-
) throws -> Reference {
847-
.named(try expectQuoted(endingWith: end).value)
848+
) throws -> AST.Atom.Reference {
849+
let str = try expectQuoted(endingWith: end)
850+
return .init(.named(str.value), innerLoc: str.location)
848851
}
849852

850853
/// Try to lex a numbered reference, or otherwise a named reference.
@@ -853,10 +856,10 @@ extension Source {
853856
///
854857
private mutating func expectNamedOrNumberedReference(
855858
endingWith ending: String
856-
) throws -> Reference {
859+
) throws -> AST.Atom.Reference {
857860
if let numbered = try lexNumberedReference() {
858861
try expect(sequence: ending)
859-
return numbered.value
862+
return numbered
860863
}
861864
return try expectNamedReference(endingWith: ending)
862865
}
@@ -904,7 +907,7 @@ extension Source {
904907

905908
// PCRE allows \g followed by a bare numeric reference.
906909
if let ref = try src.lexNumberedReference() {
907-
return .backreference(ref.value)
910+
return .backreference(ref)
908911
}
909912

910913
// Fallback to a literal character. We need to return here as we've
@@ -942,8 +945,10 @@ extension Source {
942945
let num = try Source.validateNumber(digits.string, Int.self, .decimal)
943946
if num < 10 || digits.first == "8" || digits.first == "9" ||
944947
num <= priorGroupCount {
948+
let _start = src.currentPosition
945949
src.advance(digits.count)
946-
return .backreference(.absolute(num))
950+
return .backreference(
951+
.init(.absolute(num), innerLoc: Location(_start ..< src.currentPosition)))
947952
}
948953
}
949954
return nil

Sources/_MatchingEngine/Regex/Printing/DumpAST.swift

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,20 @@ extension AST.Atom {
107107
case .endOfLine: return "$"
108108

109109
case .backreference(let r), .subpattern(let r), .condition(let r):
110-
return "\(r)"
110+
return "\(r._dumpBase)"
111111

112112
case .char, .scalar:
113113
fatalError("Unreachable")
114114
}
115115
}
116116
}
117117

118+
extension AST.Atom.Reference: _ASTPrintable {
119+
public var _dumpBase: String {
120+
"\(kind)"
121+
}
122+
}
123+
118124
extension AST.Group.Kind: _ASTPrintable {
119125
public var _dumpBase: String {
120126
switch self {

Sources/_StringProcessing/ASTBuilder.swift

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -233,14 +233,14 @@ func scalar_m(_ s: Unicode.Scalar) -> AST.CustomCharacterClass.Member {
233233
atom_m(.scalar(s))
234234
}
235235

236-
func backreference(_ r: Reference) -> AST {
237-
atom(.backreference(r))
236+
func backreference(_ r: AST.Atom.Reference.Kind) -> AST {
237+
atom(.backreference(.init(r, innerLoc: .fake)))
238238
}
239-
func subpattern(_ r: Reference) -> AST {
240-
atom(.subpattern(r))
239+
func subpattern(_ r: AST.Atom.Reference.Kind) -> AST {
240+
atom(.subpattern(.init(r, innerLoc: .fake)))
241241
}
242-
func condition(_ r: Reference) -> AST {
243-
atom(.condition(r))
242+
func condition(_ r: AST.Atom.Reference.Kind) -> AST {
243+
atom(.condition(.init(r, innerLoc: .fake)))
244244
}
245245

246246
func prop(

0 commit comments

Comments
 (0)