@@ -51,28 +51,52 @@ Lexical analysis provides the following:
51
51
52
52
*/
53
53
54
- private struct Parser {
55
- var source : Source
56
-
57
- /// Tracks the number of parent custom character classes to allow us to
58
- /// determine whether or not to lex with custom character class syntax.
59
- fileprivate var customCharacterClassDepth = 0
54
+ struct ParsingContext {
55
+ /// Whether we're currently parsing in a custom character class.
56
+ var isInCustomCharacterClass = false
60
57
61
58
/// Tracks the number of group openings we've seen, to disambiguate the '\n'
62
59
/// syntax as a backreference or an octal sequence.
63
60
fileprivate var priorGroupCount = 0
64
61
62
+ /// A set of used group names.
63
+ fileprivate var usedGroupNames = Set < String > ( )
64
+
65
+ fileprivate mutating func recordGroup( _ g: AST . Group . Kind ) {
66
+ // TODO: Needs to track group number resets (?|...).
67
+ priorGroupCount += 1
68
+ if let name = g. name {
69
+ usedGroupNames. insert ( name)
70
+ }
71
+ }
72
+
73
+ private init ( ) { }
74
+ static var none : ParsingContext { . init( ) }
75
+
76
+ /// Check whether a given reference refers to a prior group.
77
+ func isPriorGroupRef( _ ref: AST . Atom . Reference . Kind ) -> Bool {
78
+ switch ref {
79
+ case . absolute( let i) :
80
+ return i <= priorGroupCount
81
+ case . relative( let i) :
82
+ return i < 0
83
+ case . named( let str) :
84
+ return usedGroupNames. contains ( str)
85
+ }
86
+ }
87
+ }
88
+
89
+ private struct Parser {
90
+ var source : Source
91
+ var context : ParsingContext = . none
92
+
65
93
init ( _ source: Source ) {
66
94
self . source = source
67
95
}
68
96
}
69
97
70
98
// Diagnostics
71
99
extension Parser {
72
- private var isInCustomCharacterClass : Bool {
73
- customCharacterClassDepth > 0
74
- }
75
-
76
100
mutating func report(
77
101
_ str: String , _ function: String = #function, _ line: Int = #line
78
102
) throws -> Never {
@@ -172,6 +196,20 @@ extension Parser {
172
196
return . concatenation( . init( result, loc ( _start) ) )
173
197
}
174
198
199
+ /// Perform a recursive parse for the body of a group.
200
+ mutating func parseGroupBody(
201
+ start: Source . Position , _ kind: AST . Located < AST . Group . Kind >
202
+ ) throws -> AST . Group {
203
+ context. recordGroup ( kind. value)
204
+
205
+ let child = try parse ( )
206
+ // An implicit scoped group has already consumed its closing paren.
207
+ if !kind. value. hasImplicitScope {
208
+ try source. expect ( " ) " )
209
+ }
210
+ return . init( kind, child, loc ( start) )
211
+ }
212
+
175
213
/// Parse a (potentially quantified) component
176
214
///
177
215
/// QuantOperand -> Group | CustomCharClass | Atom
@@ -182,24 +220,18 @@ extension Parser {
182
220
183
221
let _start = source. currentPosition
184
222
223
+ // Check if we have the start of a group '('.
185
224
if let kind = try source. lexGroupStart ( ) {
186
- priorGroupCount += 1
187
- let child = try parse ( )
188
- // An implicit scoped group has already consumed its closing paren.
189
- if !kind. value. hasImplicitScope {
190
- try source. expect ( " ) " )
191
- }
192
- return . group( . init( kind, child, loc ( _start) ) )
225
+ return . group( try parseGroupBody ( start: _start, kind) )
193
226
}
227
+
228
+ // Check if we have the start of a custom character class '['.
194
229
if let cccStart = try source. lexCustomCCStart ( ) {
195
230
return . customCharacterClass(
196
231
try parseCustomCharacterClass ( cccStart) )
197
232
}
198
233
199
- if let atom = try source. lexAtom (
200
- isInCustomCharacterClass: isInCustomCharacterClass,
201
- priorGroupCount: priorGroupCount
202
- ) {
234
+ if let atom = try source. lexAtom ( context: context) {
203
235
// TODO: track source locations
204
236
return . atom( atom)
205
237
}
@@ -224,6 +256,10 @@ extension Parser {
224
256
mutating func parseCustomCharacterClass(
225
257
_ start: Source . Located < CustomCC . Start >
226
258
) throws -> CustomCC {
259
+ let alreadyInCCC = context. isInCustomCharacterClass
260
+ context. isInCustomCharacterClass = true
261
+ defer { context. isInCustomCharacterClass = alreadyInCCC }
262
+
227
263
typealias Member = CustomCC . Member
228
264
try source. expectNonEmpty ( )
229
265
@@ -279,14 +315,11 @@ extension Parser {
279
315
continue
280
316
}
281
317
282
- guard let atom = try source. lexAtom (
283
- isInCustomCharacterClass: true , priorGroupCount: priorGroupCount)
284
- else { break }
318
+ guard let atom = try source. lexAtom ( context: context) else { break }
285
319
286
320
// Range between atoms.
287
- if let ( dashLoc, rhs) = try source. lexCustomCharClassRangeEnd (
288
- priorGroupCount: priorGroupCount
289
- ) {
321
+ if let ( dashLoc, rhs) =
322
+ try source. lexCustomCharClassRangeEnd ( context: context) {
290
323
guard atom. literalCharacterValue != nil &&
291
324
rhs. literalCharacterValue != nil else {
292
325
throw ParseError . invalidCharacterClassRangeOperand
0 commit comments