9
9
//
10
10
//===----------------------------------------------------------------------===//
11
11
12
- // TODO: mock up multi-line soon
13
-
14
- enum Delimiter: Hashable, CaseIterable {
15
- case traditional
16
- case experimental
17
- case reSingleQuote
18
- case rxSingleQuote
19
-
20
- var openingAndClosing: (opening: String, closing: String) {
21
- switch self {
22
- case .traditional: return ("#/", "/#")
23
- case .experimental: return ("#|", "|#")
24
- case .reSingleQuote: return ("re'", "'")
25
- case .rxSingleQuote: return ("rx'", "'")
12
+ struct Delimiter: Hashable {
13
+ let kind: Kind
14
+ let poundCount: Int
15
+
16
+ init(_ kind: Kind, poundCount: Int) {
17
+ precondition(kind.allowsExtendedPoundSyntax || poundCount == 0)
18
+ self.kind = kind
19
+ self.poundCount = poundCount
20
+ }
21
+
22
+ var opening: String {
23
+ String(repeating: "#", count: poundCount) + kind.opening
24
+ }
25
+ var closing: String {
26
+ kind.closing + String(repeating: "#", count: poundCount)
27
+ }
28
+
29
+ /// Whether or not multi-line mode is permitted.
30
+ var allowsMultiline: Bool {
31
+ switch kind {
32
+ case .forwardSlash:
33
+ return poundCount > 0
34
+ case .experimental, .reSingleQuote, .rxSingleQuote:
35
+ return false
26
36
}
27
37
}
28
- var opening: String { openingAndClosing.opening }
29
- var closing: String { openingAndClosing.closing }
30
-
31
- /// The default set of syntax options that the delimiter indicates.
32
- var defaultSyntaxOptions: SyntaxOptions {
33
- switch self {
34
- case .traditional, .reSingleQuote:
35
- return .traditional
36
- case .experimental, .rxSingleQuote:
37
- return .experimental
38
+
39
+ /// The delimiters which are currently enabled.
40
+ static var enabledDelimiters: [Kind] { [.forwardSlash] }
41
+
42
+ /// All known delimiters.
43
+ static var allDelimiters: [Kind] { Kind.allCases }
44
+ }
45
+
46
+ extension Delimiter {
47
+ enum Kind: Hashable, CaseIterable {
48
+ case forwardSlash
49
+ case experimental
50
+ case reSingleQuote
51
+ case rxSingleQuote
52
+
53
+ var openingAndClosing: (opening: String, closing: String) {
54
+ switch self {
55
+ case .forwardSlash: return ("/", "/")
56
+ case .experimental: return ("#|", "|#")
57
+ case .reSingleQuote: return ("re'", "'")
58
+ case .rxSingleQuote: return ("rx'", "'")
59
+ }
60
+ }
61
+ var opening: String { openingAndClosing.opening }
62
+ var closing: String { openingAndClosing.closing }
63
+
64
+ /// Whether or not extended pound syntax e.g `##/.../##` is allowed with
65
+ /// this delimiter.
66
+ var allowsExtendedPoundSyntax: Bool {
67
+ switch self {
68
+ case .forwardSlash:
69
+ return true
70
+ case .experimental, .reSingleQuote, .rxSingleQuote:
71
+ return false
72
+ }
38
73
}
39
74
}
40
75
}
41
76
42
77
struct DelimiterLexError: Error, CustomStringConvertible {
43
78
enum Kind: Hashable {
44
- case endOfString
79
+ case unterminated
45
80
case invalidUTF8 // TODO: better range reporting
46
81
case unknownDelimiter
47
82
case unprintableASCII
83
+ case multilineClosingNotOnNewline
48
84
}
49
85
50
86
var kind: Kind
@@ -59,10 +95,11 @@ struct DelimiterLexError: Error, CustomStringConvertible {
59
95
60
96
var description: String {
61
97
switch kind {
62
- case .endOfString : return "unterminated regex literal"
98
+ case .unterminated : return "unterminated regex literal"
63
99
case .invalidUTF8: return "invalid UTF-8 found in source file"
64
100
case .unknownDelimiter: return "unknown regex literal delimiter"
65
101
case .unprintableASCII: return "unprintable ASCII character found in source file"
102
+ case .multilineClosingNotOnNewline: return "closing delimiter must appear on new line"
66
103
}
67
104
}
68
105
}
@@ -72,11 +109,18 @@ fileprivate struct DelimiterLexer {
72
109
var cursor: UnsafeRawPointer
73
110
let end: UnsafeRawPointer
74
111
75
- init(start: UnsafeRawPointer, end: UnsafeRawPointer) {
112
+ var firstNewline: UnsafeRawPointer?
113
+ var isMultiline: Bool { firstNewline != nil }
114
+
115
+ let delimiters: [Delimiter.Kind]
116
+
117
+ init(start: UnsafeRawPointer, end: UnsafeRawPointer,
118
+ delimiters: [Delimiter.Kind]) {
76
119
precondition(start <= end)
77
120
self.start = start
78
121
self.cursor = start
79
122
self.end = end
123
+ self.delimiters = delimiters
80
124
}
81
125
82
126
func ascii(_ s: Unicode.Scalar) -> UInt8 {
@@ -120,25 +164,34 @@ fileprivate struct DelimiterLexer {
120
164
precondition(cursor <= end, "Cannot advance past end")
121
165
}
122
166
123
- /// Check to see if a UTF-8 sequence can be eaten from the current cursor.
124
- func canEat(_ utf8: String.UTF8View ) -> Bool {
125
- guard let slice = slice(utf8 .count) else { return false }
126
- return slice.elementsEqual(utf8 )
167
+ /// Check to see if a byte sequence can be eaten from the current cursor.
168
+ func canEat<C : Collection> (_ bytes: C ) -> Bool where C.Element == UInt8 {
169
+ guard let slice = slice(bytes .count) else { return false }
170
+ return slice.elementsEqual(bytes )
127
171
}
128
172
129
- /// Attempt to eat a UTF-8 byte sequence, returning `true` if successful.
130
- mutating func tryEat(_ utf8: String.UTF8View) -> Bool {
131
- guard canEat(utf8) else { return false }
132
- advanceCursor(utf8.count)
173
+ /// Attempt to eat a byte sequence, returning `true` if successful.
174
+ mutating func tryEat<C : Collection>(
175
+ _ bytes: C
176
+ ) -> Bool where C.Element == UInt8 {
177
+ guard canEat(bytes) else { return false }
178
+ advanceCursor(bytes.count)
179
+ return true
180
+ }
181
+
182
+ /// Attempt to eat an ascii scalar, returning `true` if successful.
183
+ mutating func tryEat(ascii s: Unicode.Scalar) -> Bool {
184
+ guard load() == ascii(s) else { return false }
185
+ advanceCursor()
133
186
return true
134
187
}
135
188
136
189
/// Attempt to skip over a closing delimiter character that is unlikely to be
137
190
/// the actual closing delimiter.
138
191
mutating func trySkipDelimiter(_ delimiter: Delimiter) {
139
192
// Only the closing `'` for re'...'/rx'...' can potentially be skipped over.
140
- switch delimiter {
141
- case .traditional , .experimental:
193
+ switch delimiter.kind {
194
+ case .forwardSlash , .experimental:
142
195
return
143
196
case .reSingleQuote, .rxSingleQuote:
144
197
break
@@ -222,12 +275,23 @@ fileprivate struct DelimiterLexer {
222
275
let contentsEnd = cursor
223
276
guard tryEat(delimiter.closing.utf8) else { return nil }
224
277
225
- // Form a string from the contents and make sure it's valid UTF-8.
226
278
let count = contentsEnd - contentsStart
227
279
let contents = UnsafeRawBufferPointer(
228
280
start: contentsStart, count: count)
229
- let s = String(decoding: contents, as: UTF8.self)
230
281
282
+ // In multi-line mode, we must be on a new line. So scan backwards and make
283
+ // sure we only have whitespace until the newline.
284
+ if isMultiline {
285
+ let idx = contents.lastIndex(
286
+ where: { $0 == ascii("\n") || $0 == ascii("\r") })! + 1
287
+ guard contents[idx...].all({ $0 == ascii(" ") || $0 == ascii("\t") })
288
+ else {
289
+ throw DelimiterLexError(.multilineClosingNotOnNewline, resumeAt: cursor)
290
+ }
291
+ }
292
+
293
+ // Form a string from the contents and make sure it's valid UTF-8.
294
+ let s = String(decoding: contents, as: UTF8.self)
231
295
guard s.utf8.elementsEqual(contents) else {
232
296
throw DelimiterLexError(.invalidUTF8, resumeAt: cursor)
233
297
}
@@ -238,7 +302,10 @@ fileprivate struct DelimiterLexer {
238
302
/// the end of the buffer is reached.
239
303
mutating func advance(escaped: Bool = false) throws {
240
304
guard let next = load() else {
241
- throw DelimiterLexError(.endOfString, resumeAt: cursor)
305
+ // We've hit the end of the buffer. In multi-line mode, we don't want to
306
+ // skip over what is likely otherwise valid Swift code, so resume from the
307
+ // first newline.
308
+ throw DelimiterLexError(.unterminated, resumeAt: firstNewline ?? cursor)
242
309
}
243
310
switch UnicodeScalar(next) {
244
311
case let next where !next.isASCII:
@@ -249,7 +316,10 @@ fileprivate struct DelimiterLexer {
249
316
advanceCursor()
250
317
251
318
case "\n", "\r":
252
- throw DelimiterLexError(.endOfString, resumeAt: cursor)
319
+ guard isMultiline else {
320
+ throw DelimiterLexError(.unterminated, resumeAt: cursor)
321
+ }
322
+ advanceCursor()
253
323
254
324
case "\0":
255
325
// TODO: Warn to match the behavior of String literal lexer? Or should
@@ -261,8 +331,12 @@ fileprivate struct DelimiterLexer {
261
331
advanceCursor()
262
332
try advance(escaped: true)
263
333
264
- case let next where !next.isPrintableASCII:
334
+ case let next
335
+ where !next.isPrintableASCII && !(isMultiline && next == "\t"):
265
336
// Diagnose unprintable ASCII.
337
+ // Note that tabs are allowed in multi-line literals.
338
+ // TODO: This matches the string literal behavior, but should we allow
339
+ // tabs for single-line regex literals too?
266
340
// TODO: Ideally we would recover and continue to lex until the ending
267
341
// delimiter.
268
342
throw DelimiterLexError(.unprintableASCII, resumeAt: cursor.successor())
@@ -272,17 +346,60 @@ fileprivate struct DelimiterLexer {
272
346
}
273
347
}
274
348
349
+ mutating func tryLexOpeningDelimiter(poundCount: Int) -> Delimiter? {
350
+ for kind in delimiters {
351
+ // If the delimiter allows extended pound syntax, or there are no pounds,
352
+ // we just need to lex it.
353
+ let opening = kind.opening.utf8
354
+ if kind.allowsExtendedPoundSyntax || poundCount == 0 {
355
+ guard tryEat(opening) else { continue }
356
+ return Delimiter(kind, poundCount: poundCount)
357
+ }
358
+
359
+ // The delimiter doesn't allow extended pound syntax, so the pounds must be
360
+ // part of the delimiter.
361
+ guard
362
+ poundCount < opening.count,
363
+ opening.prefix(poundCount)
364
+ .elementsEqual(repeatElement(ascii("#"), count: poundCount)),
365
+ tryEat(opening.dropFirst(poundCount))
366
+ else { continue }
367
+
368
+ return Delimiter(kind, poundCount: 0)
369
+ }
370
+ return nil
371
+ }
372
+
275
373
/*consuming*/ mutating func lex(
276
374
) throws -> (contents: String, Delimiter, end: UnsafeRawPointer) {
375
+ // We can consume any number of pound signs.
376
+ var poundCount = 0
377
+ while tryEat(ascii: "#") {
378
+ poundCount += 1
379
+ }
277
380
278
381
// Try to lex the opening delimiter.
279
- guard let delimiter = Delimiter.allCases.first(
280
- where: { tryEat($0.opening.utf8) }
281
- ) else {
382
+ guard let delimiter = tryLexOpeningDelimiter(poundCount: poundCount) else {
282
383
throw DelimiterLexError(.unknownDelimiter, resumeAt: cursor.successor())
283
384
}
284
-
285
385
let contentsStart = cursor
386
+
387
+ // If the delimiter allows multi-line, try skipping over any whitespace to a
388
+ // newline character. If we can do that, we enter multi-line mode.
389
+ if delimiter.allowsMultiline {
390
+ while let next = load() {
391
+ switch next {
392
+ case ascii(" "), ascii("\t"):
393
+ advanceCursor()
394
+ continue
395
+ case ascii("\n"), ascii("\r"):
396
+ firstNewline = cursor
397
+ default:
398
+ break
399
+ }
400
+ break
401
+ }
402
+ }
286
403
while true {
287
404
// Check to see if we're at a character that looks like a delimiter, but
288
405
// likely isn't. In such a case, we can attempt to skip over it.
@@ -302,20 +419,34 @@ fileprivate struct DelimiterLexer {
302
419
/// Drop a set of regex delimiters from the input string, returning the contents
303
420
/// and the delimiter used. The input string must have valid delimiters.
304
421
func droppingRegexDelimiters(_ str: String) -> (String, Delimiter) {
305
- func stripDelimiter(_ delim: Delimiter) -> String? {
422
+ func stripDelimiter(_ kind: Delimiter.Kind) -> (String, Delimiter)? {
423
+ var slice = str.utf8[...]
424
+
425
+ // Try strip any number of opening '#'s.
426
+ var poundCount = 0
427
+ if kind.allowsExtendedPoundSyntax {
428
+ poundCount = slice.prefix(while: {
429
+ $0 == UInt8(("#" as UnicodeScalar).value)
430
+ }).count
431
+ slice = slice.dropFirst(poundCount)
432
+ }
433
+
306
434
// The opening delimiter must match.
307
- guard var slice = str.utf8. tryDropPrefix(delim .opening.utf8)
435
+ guard var slice = slice. tryDropPrefix(kind .opening.utf8)
308
436
else { return nil }
309
437
310
438
// The closing delimiter may optionally match, as it may not be present in
311
439
// invalid code.
440
+ let delim = Delimiter(kind, poundCount: poundCount)
312
441
if let newSlice = slice.tryDropSuffix(delim.closing.utf8) {
313
442
slice = newSlice
314
443
}
315
- return String(slice)
444
+ let result = String(decoding: slice, as: UTF8.self)
445
+ precondition(result.utf8.elementsEqual(slice))
446
+ return (result, delim)
316
447
}
317
- for d in Delimiter.allCases {
318
- if let contents = stripDelimiter(d ) {
448
+ for kind in Delimiter.allDelimiters {
449
+ if let ( contents, d) = stripDelimiter(kind ) {
319
450
return (contents, d)
320
451
}
321
452
}
@@ -325,8 +456,9 @@ func droppingRegexDelimiters(_ str: String) -> (String, Delimiter) {
325
456
/// Attempt to lex a regex literal between `start` and `end`, returning either
326
457
/// the contents and pointer from which to resume lexing, or an error.
327
458
func lexRegex(
328
- start: UnsafeRawPointer, end: UnsafeRawPointer
459
+ start: UnsafeRawPointer, end: UnsafeRawPointer,
460
+ delimiters: [Delimiter.Kind] = Delimiter.enabledDelimiters
329
461
) throws -> (contents: String, Delimiter, end: UnsafeRawPointer) {
330
- var lexer = DelimiterLexer(start: start, end: end)
462
+ var lexer = DelimiterLexer(start: start, end: end, delimiters: delimiters )
331
463
return try lexer.lex()
332
464
}
0 commit comments