@@ -865,8 +865,6 @@ extension Source {
865
865
return . recursionCheck
866
866
}
867
867
868
- // TODO: Oniguruma can also parse an additional recursion level for
869
- // group-matched checks.
870
868
if let open = src. tryEat ( anyOf: " < " , " ' " ) {
871
869
// In PCRE, this can only be a named reference. In Oniguruma, it can
872
870
// also be a numbered reference.
@@ -884,9 +882,9 @@ extension Source {
884
882
}
885
883
886
884
// If we have a numbered reference, this is a check to see if a group
887
- // matched.
888
- if let numRef = try src. lexNumberedReference ( ) {
889
- return . groupMatched( numRef )
885
+ // matched. Oniguruma also permits a recursion level here.
886
+ if let num = try src. lexNumberedReference ( allowRecursionLevel : true ) {
887
+ return . groupMatched( num )
890
888
}
891
889
892
890
// PCRE and .NET also allow a named reference to be parsed here. PCRE
@@ -896,9 +894,9 @@ extension Source {
896
894
// FIXME: This should apply to future groups too.
897
895
// TODO: We should probably advise users to use the more explicit
898
896
// syntax.
899
- if let nameRef = src. lexNamedReference ( endingWith : " ) " ,
900
- eatEnding: false ) ,
901
- context. isPriorGroupRef ( nameRef. kind) {
897
+ let nameRef = src. lexNamedReference (
898
+ endingWith : " ) " , eatEnding: false , allowRecursionLevel : true )
899
+ if let nameRef = nameRef , context. isPriorGroupRef ( nameRef. kind) {
902
900
return . groupMatched( nameRef)
903
901
}
904
902
return nil
@@ -1052,10 +1050,10 @@ extension Source {
1052
1050
1053
1051
/// Try to lex an absolute or relative numbered reference.
1054
1052
///
1055
- /// NumberRef -> ('+' | '-')? <Decimal Number>
1053
+ /// NumberRef -> ('+' | '-')? <Decimal Number> RecursionLevel?
1056
1054
///
1057
1055
private mutating func lexNumberedReference(
1058
- allowWholePatternRef: Bool = false
1056
+ allowWholePatternRef: Bool = false , allowRecursionLevel : Bool = false
1059
1057
) throws -> AST . Reference ? {
1060
1058
let kind = try recordLoc { src -> AST . Reference . Kind ? in
1061
1059
// Note this logic should match canLexNumberedReference.
@@ -1074,7 +1072,22 @@ extension Source {
1074
1072
guard allowWholePatternRef || kind. value != . recurseWholePattern else {
1075
1073
throw ParseError . cannotReferToWholePattern
1076
1074
}
1077
- return . init( kind. value, innerLoc: kind. location)
1075
+ let recLevel = allowRecursionLevel ? try lexRecursionLevel ( ) : nil
1076
+ let loc = recLevel? . location. union ( with: kind. location) ?? kind. location
1077
+ return . init( kind. value, recursionLevel: recLevel, innerLoc: loc)
1078
+ }
1079
+
1080
+ /// Try to consume a recursion level for a group reference.
1081
+ ///
1082
+ /// RecursionLevel -> '+' <Int> | '-' <Int>
1083
+ ///
1084
+ private mutating func lexRecursionLevel(
1085
+ ) throws -> Located < Int > ? {
1086
+ try recordLoc { src in
1087
+ if src. tryEat ( " + " ) { return try src. expectNumber ( ) . value }
1088
+ if src. tryEat ( " - " ) { return try - src. expectNumber ( ) . value }
1089
+ return nil
1090
+ }
1078
1091
}
1079
1092
1080
1093
/// Checks whether a numbered reference can be lexed.
@@ -1087,19 +1100,34 @@ extension Source {
1087
1100
1088
1101
/// Eat a named reference up to a given closing delimiter.
1089
1102
private mutating func expectNamedReference(
1090
- endingWith end: String , eatEnding: Bool = true
1103
+ endingWith end: String , eatEnding: Bool = true ,
1104
+ allowRecursionLevel: Bool = false
1091
1105
) throws -> AST . Reference {
1092
- let str = try expectGroupName ( endingWith: end, eatEnding: eatEnding)
1093
- return . init( . named( str. value) , innerLoc: str. location)
1106
+ // Note we don't want to eat the ending as we may also want to parse a
1107
+ // recursion level.
1108
+ let str = try expectGroupName ( endingWith: end, eatEnding: false )
1109
+
1110
+ // If we're allowed to, try parse a recursion level.
1111
+ let recLevel = allowRecursionLevel ? try lexRecursionLevel ( ) : nil
1112
+ let loc = recLevel? . location. union ( with: str. location) ?? str. location
1113
+
1114
+ if eatEnding {
1115
+ try expect ( sequence: end)
1116
+ }
1117
+ return . init( . named( str. value) , recursionLevel: recLevel, innerLoc: loc)
1094
1118
}
1095
1119
1096
1120
/// Try to consume a named reference up to a closing delimiter, returning
1097
1121
/// `nil` if the characters aren't valid for a named reference.
1098
1122
private mutating func lexNamedReference(
1099
- endingWith end: String , eatEnding: Bool = true
1123
+ endingWith end: String , eatEnding: Bool = true ,
1124
+ allowRecursionLevel: Bool = false
1100
1125
) -> AST . Reference ? {
1101
1126
tryEating { src in
1102
- try ? src. expectNamedReference ( endingWith: end, eatEnding: eatEnding)
1127
+ try ? src. expectNamedReference (
1128
+ endingWith: end, eatEnding: eatEnding,
1129
+ allowRecursionLevel: allowRecursionLevel
1130
+ )
1103
1131
}
1104
1132
}
1105
1133
@@ -1109,17 +1137,22 @@ extension Source {
1109
1137
///
1110
1138
private mutating func expectNamedOrNumberedReference(
1111
1139
endingWith ending: String , eatEnding: Bool = true ,
1112
- allowWholePatternRef: Bool = false
1140
+ allowWholePatternRef: Bool = false , allowRecursionLevel : Bool = false
1113
1141
) throws -> AST . Reference {
1114
- if let numbered = try lexNumberedReference (
1115
- allowWholePatternRef: allowWholePatternRef
1116
- ) {
1142
+ let num = try lexNumberedReference (
1143
+ allowWholePatternRef: allowWholePatternRef,
1144
+ allowRecursionLevel: allowRecursionLevel
1145
+ )
1146
+ if let num = num {
1117
1147
if eatEnding {
1118
1148
try expect ( sequence: ending)
1119
1149
}
1120
- return numbered
1150
+ return num
1121
1151
}
1122
- return try expectNamedReference ( endingWith: ending, eatEnding: eatEnding)
1152
+ return try expectNamedReference (
1153
+ endingWith: ending, eatEnding: eatEnding,
1154
+ allowRecursionLevel: allowRecursionLevel
1155
+ )
1123
1156
}
1124
1157
1125
1158
private static func getClosingDelimiter(
@@ -1176,11 +1209,21 @@ extension Source {
1176
1209
}
1177
1210
1178
1211
if src. tryEat ( " k " ) {
1179
- // Perl/.NET-style backreferences.
1180
- if let openChar = src. tryEat ( anyOf: " < " , " ' " , " { " ) {
1212
+ // Perl/.NET/Oniguruma -style backreferences.
1213
+ if let openChar = src. tryEat ( anyOf: " < " , " ' " ) {
1181
1214
let closing = String ( Source . getClosingDelimiter ( for: openChar) )
1215
+
1216
+ // Perl only accept named references here, but Oniguruma and .NET
1217
+ // also accepts numbered references. This shouldn't be an ambiguity
1218
+ // as named references may not begin with a digit, '-', or '+'.
1219
+ // Oniguruma also allows a recursion level to be specified.
1220
+ return . backreference( try src. expectNamedOrNumberedReference (
1221
+ endingWith: closing, allowRecursionLevel: true ) )
1222
+ }
1223
+ // Perl/.NET also allow a named references with the '{' delimiter.
1224
+ if src. tryEat ( " { " ) {
1182
1225
return . backreference(
1183
- try src. expectNamedReference ( endingWith: closing ) )
1226
+ try src. expectNamedReference ( endingWith: " } " ) )
1184
1227
}
1185
1228
return nil
1186
1229
}
@@ -1199,10 +1242,10 @@ extension Source {
1199
1242
// here.
1200
1243
if firstChar != " 0 " , let numAndLoc = try src. lexNumber ( ) {
1201
1244
let num = numAndLoc. value
1202
- let loc = numAndLoc. location
1245
+ let ref = AST . Reference ( . absolute ( num ) , innerLoc : numAndLoc. location)
1203
1246
if num < 10 || firstChar == " 8 " || firstChar == " 9 " ||
1204
- context. isPriorGroupRef ( . absolute ( num ) ) {
1205
- return . backreference( . init ( . absolute ( num ) , innerLoc : loc ) )
1247
+ context. isPriorGroupRef ( ref . kind ) {
1248
+ return . backreference( ref )
1206
1249
}
1207
1250
return nil
1208
1251
}
@@ -1225,7 +1268,6 @@ extension Source {
1225
1268
try recordLoc { src in
1226
1269
try src. tryEating { src in
1227
1270
guard src. tryEat ( sequence: " (? " ) else { return nil }
1228
- let _start = src. currentPosition
1229
1271
1230
1272
// Note the below should be covered by canLexGroupLikeReference.
1231
1273
@@ -1243,8 +1285,7 @@ extension Source {
1243
1285
}
1244
1286
1245
1287
// Whole-pattern recursion, which is equivalent to (?0).
1246
- if src. tryEat ( " R " ) {
1247
- let loc = Location ( _start ..< src. currentPosition)
1288
+ if let loc = src. tryEatWithLoc ( " R " ) {
1248
1289
try src. expect ( " ) " )
1249
1290
return . subpattern( . init( . recurseWholePattern, innerLoc: loc) )
1250
1291
}
0 commit comments