Skip to content

Commit 6e4c2bd

Browse files
committed
Cleanup
1 parent 5359e31 commit 6e4c2bd

File tree

6 files changed

+182
-186
lines changed

6 files changed

+182
-186
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 72 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -74,75 +74,81 @@ fileprivate extension Compiler.ByteCodeGen {
7474
}
7575
}
7676

77-
mutating func emitQuotedLiteral(_ s: String) {
78-
if options.semanticLevel == .graphemeCluster {
79-
if options.isCaseInsensitive {
80-
// future work: if all ascii, emit matchBitset instructions with
81-
// case insensitive bitsets
82-
83-
// TODO: buildCaseInsensitiveMatchSequence(c) or alternative
84-
builder.buildConsume { input, bounds in
85-
var iterator = s.makeIterator()
86-
var currentIndex = bounds.lowerBound
87-
while let ch = iterator.next() {
88-
guard currentIndex < bounds.upperBound,
89-
ch.lowercased() == input[currentIndex].lowercased()
90-
else { return nil }
91-
input.formIndex(after: &currentIndex)
92-
}
93-
return currentIndex
77+
mutating func emitScalarQuotedLiteral(_ s: String) {
78+
precondition(options.semanticLevel == .unicodeScalar)
79+
if optimizationsEnabled && !options.isCaseInsensitive {
80+
// Match all scalars exactly, never boundary check because we're in
81+
// unicode scalars mode
82+
for char in s {
83+
for scalar in char.unicodeScalars {
84+
builder.buildMatchScalar(scalar, boundaryCheck: false)
9485
}
95-
} else {
96-
if optimizationsEnabled && s.allSatisfy({char in char.isASCII}) {
97-
for char in s.dropLast(1) {
98-
// Note: only cr-lf is multiple scalars
99-
for scalar in char.unicodeScalars {
100-
builder.buildMatchScalar(scalar, boundaryCheck: false)
101-
}
102-
}
103-
let lastChar = s.last!
104-
for scalar in lastChar.unicodeScalars {
105-
// Only boundary check if we are the last scalar in the last character
106-
// to make sure that there isn't a combining scalar after the quoted literal
107-
let boundaryCheck = scalar == lastChar.unicodeScalars.last!
108-
builder.buildMatchScalar(scalar, boundaryCheck: boundaryCheck)
86+
}
87+
return
88+
}
89+
90+
builder.buildConsume {
91+
[caseInsensitive = options.isCaseInsensitive] input, bounds in
92+
// TODO: Case folding
93+
var iterator = s.unicodeScalars.makeIterator()
94+
var currentIndex = bounds.lowerBound
95+
while let scalar = iterator.next() {
96+
guard currentIndex < bounds.upperBound else { return nil }
97+
if caseInsensitive {
98+
if scalar.properties.lowercaseMapping != input.unicodeScalars[currentIndex].properties.lowercaseMapping {
99+
return nil
109100
}
110101
} else {
111-
builder.buildMatchSequence(s)
102+
if scalar != input.unicodeScalars[currentIndex] {
103+
return nil
104+
}
112105
}
106+
input.unicodeScalars.formIndex(after: &currentIndex)
113107
}
114-
} else {
115-
if optimizationsEnabled && !options.isCaseInsensitive {
116-
// Match all scalars exactly, never boundary check because we're in
117-
// unicode scalars mode
118-
for char in s {
119-
for scalar in char.unicodeScalars {
120-
builder.buildMatchScalar(scalar, boundaryCheck: false)
121-
}
108+
return currentIndex
109+
}
110+
}
111+
112+
mutating func emitQuotedLiteral(_ s: String) {
113+
guard options.semanticLevel == .graphemeCluster else {
114+
emitScalarQuotedLiteral(s)
115+
return
116+
}
117+
118+
if options.isCaseInsensitive {
119+
// future work: if all ascii, emit matchBitset instructions with
120+
// case insensitive bitsets
121+
// TODO: buildCaseInsensitiveMatchSequence(c) or alternative
122+
builder.buildConsume { input, bounds in
123+
var iterator = s.makeIterator()
124+
var currentIndex = bounds.lowerBound
125+
while let ch = iterator.next() {
126+
guard currentIndex < bounds.upperBound,
127+
ch.lowercased() == input[currentIndex].lowercased()
128+
else { return nil }
129+
input.formIndex(after: &currentIndex)
122130
}
123-
} else {
124-
builder.buildConsume {
125-
[caseInsensitive = options.isCaseInsensitive] input, bounds in
126-
// TODO: Case folding
127-
var iterator = s.unicodeScalars.makeIterator()
128-
var currentIndex = bounds.lowerBound
129-
while let scalar = iterator.next() {
130-
guard currentIndex < bounds.upperBound else { return nil }
131-
if caseInsensitive {
132-
if scalar.properties.lowercaseMapping != input.unicodeScalars[currentIndex].properties.lowercaseMapping {
133-
return nil
134-
}
135-
} else {
136-
if scalar != input.unicodeScalars[currentIndex] {
137-
return nil
138-
}
139-
}
140-
input.unicodeScalars.formIndex(after: &currentIndex)
141-
}
142-
return currentIndex
131+
return currentIndex
132+
}
133+
return
134+
}
135+
136+
if optimizationsEnabled && s.allSatisfy({char in char.isASCII}) {
137+
let lastIdx = s.unicodeScalars.indices.last!
138+
for idx in s.unicodeScalars.indices {
139+
if idx == lastIdx {
140+
// Only boundary check if we are the last scalar in the last character
141+
// to make sure that there isn't a combining scalar after the quoted literal
142+
builder.buildMatchScalar(s.unicodeScalars[idx], boundaryCheck: true)
143+
} else {
144+
builder.buildMatchScalar(s.unicodeScalars[idx], boundaryCheck: false)
143145
}
144146
}
147+
return
145148
}
149+
150+
builder.buildMatchSequence(s)
151+
return
146152
}
147153

148154
mutating func emitBackreference(
@@ -281,7 +287,6 @@ fileprivate extension Compiler.ByteCodeGen {
281287
}
282288

283289
mutating func emitScalar(_ s: UnicodeScalar) throws {
284-
// TODO: Native instruction buildMatchScalar(s)
285290
if options.isCaseInsensitive {
286291
// TODO: e.g. buildCaseInsensitiveMatchScalar(s)
287292
builder.buildConsume(by: consumeScalar {
@@ -290,17 +295,11 @@ fileprivate extension Compiler.ByteCodeGen {
290295
return
291296
}
292297

293-
if optimizationsEnabled { // should we just do this unconditionally?
294-
builder.buildMatchScalar(s, boundaryCheck: false)
295-
} else {
296-
builder.buildConsume(by: consumeScalar {
297-
$0 == s
298-
})
299-
}
298+
builder.buildMatchScalar(s, boundaryCheck: false)
300299
}
301300

302301
mutating func emitCharacter(_ c: Character) throws {
303-
// Unicode scalar matches the specific scalars that comprise a character
302+
// Unicode scalar mode matches the specific scalars that comprise a character
304303
if options.semanticLevel == .unicodeScalar {
305304
for scalar in c.unicodeScalars {
306305
try emitScalar(scalar)
@@ -317,12 +316,13 @@ fileprivate extension Compiler.ByteCodeGen {
317316
? input.index(after: bounds.lowerBound)
318317
: nil
319318
}
319+
return
320320
}
321321

322322
if optimizationsEnabled && c.isASCII {
323-
for scalar in c.unicodeScalars {
324-
let boundaryCheck = scalar == c.unicodeScalars.last!
325-
builder.buildMatchScalar(scalar, boundaryCheck: boundaryCheck)
323+
let lastIdx = c.unicodeScalars.indices.last!
324+
for idx in c.unicodeScalars.indices {
325+
builder.buildMatchScalar(c.unicodeScalars[idx], boundaryCheck: idx == lastIdx)
326326
}
327327
return
328328
}

Sources/_StringProcessing/ConsumerInterface.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
@_implementationOnly import _RegexParser
1313

1414
extension Character {
15-
var singleScalarAsciiValue: UInt8? {
15+
var _singleScalarAsciiValue: UInt8? {
1616
guard self != "\r\n" else { return nil }
1717
return asciiValue
1818
}
@@ -68,7 +68,7 @@ extension DSLTree.Atom {
6868
var singleScalarASCIIValue: UInt8? {
6969
switch self {
7070
case let .char(c):
71-
return c.singleScalarAsciiValue
71+
return c._singleScalarAsciiValue
7272
case let .scalar(s) where s.isASCII:
7373
return UInt8(ascii: s)
7474
case let .unconverted(atom):
@@ -222,7 +222,7 @@ extension AST.Atom {
222222
var singleScalarASCIIValue: UInt8? {
223223
switch kind {
224224
case let .char(c):
225-
return c.singleScalarAsciiValue
225+
return c._singleScalarAsciiValue
226226
case let .scalar(s) where s.value.isASCII:
227227
return UInt8(ascii: s.value)
228228
default:

Sources/_StringProcessing/Engine/Processor.swift

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -231,18 +231,10 @@ extension Processor {
231231
currentPosition < end ? input.unicodeScalars[currentPosition] : nil
232232
}
233233

234-
func nextScalarIndex(offsetBy n: Int, boundaryCheck: Bool) -> Input.Index? {
235-
if let idx = input.unicodeScalars.index(currentPosition, offsetBy: 1, limitedBy: end),
236-
(!boundaryCheck || input.isOnGraphemeClusterBoundary(idx)) {
237-
return idx
238-
}
239-
return nil
240-
}
241-
242234
mutating func matchScalar(_ s: Unicode.Scalar, boundaryCheck: Bool) -> Bool {
243-
guard let curScalar = loadScalar(),
244-
curScalar == s,
245-
let idx = nextScalarIndex(offsetBy: 1, boundaryCheck: boundaryCheck)
235+
guard s == loadScalar(),
236+
let idx = input.unicodeScalars.index(currentPosition, offsetBy: 1, limitedBy: end),
237+
(!boundaryCheck || input.isOnGraphemeClusterBoundary(idx))
246238
else {
247239
signalFailure()
248240
return false
@@ -271,7 +263,7 @@ extension Processor {
271263
) -> Bool {
272264
guard let curScalar = loadScalar(),
273265
bitset.matches(scalar: curScalar),
274-
let idx = nextScalarIndex(offsetBy: 1, boundaryCheck: false) else {
266+
let idx = input.unicodeScalars.index(currentPosition, offsetBy: 1, limitedBy: end) else {
275267
signalFailure()
276268
return false
277269
}

Sources/_StringProcessing/Regex/DSLTree.swift

Lines changed: 0 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -164,104 +164,6 @@ extension DSLTree {
164164
indirect case subtraction(CustomCharacterClass, CustomCharacterClass)
165165
indirect case symmetricDifference(CustomCharacterClass, CustomCharacterClass)
166166
}
167-
168-
internal struct AsciiBitset {
169-
let isInverted: Bool
170-
var a: UInt64 = 0
171-
var b: UInt64 = 0
172-
173-
init(isInverted: Bool) {
174-
self.isInverted = isInverted
175-
}
176-
177-
init(_ val: UInt8, _ isInverted: Bool, _ isCaseInsensitive: Bool) {
178-
self.isInverted = isInverted
179-
add(val, isCaseInsensitive)
180-
}
181-
182-
init(low: UInt8, high: UInt8, isInverted: Bool, isCaseInsensitive: Bool) {
183-
self.isInverted = isInverted
184-
for val in low...high {
185-
add(val, isCaseInsensitive)
186-
}
187-
}
188-
189-
internal init(
190-
a: UInt64,
191-
b: UInt64,
192-
isInverted: Bool
193-
) {
194-
self.isInverted = isInverted
195-
self.a = a
196-
self.b = b
197-
}
198-
199-
internal mutating func add(_ val: UInt8, _ isCaseInsensitive: Bool) {
200-
setBit(val)
201-
if isCaseInsensitive {
202-
switch val {
203-
case 64...90: setBit(val + 32)
204-
case 97...122: setBit(val - 32)
205-
default: break
206-
}
207-
}
208-
}
209-
210-
internal mutating func setBit(_ val: UInt8) {
211-
if val < 64 {
212-
a = a | 1 << val
213-
} else {
214-
b = b | 1 << (val - 64)
215-
}
216-
}
217-
218-
private func matches(_ val: UInt8) -> Bool {
219-
if val < 64 {
220-
return (a >> val) & 1 == 1
221-
} else {
222-
return (b >> (val - 64)) & 1 == 1
223-
}
224-
}
225-
226-
internal func matches(char: Character) -> Bool {
227-
let matched: Bool
228-
if let val = char.singleScalarAsciiValue {
229-
matched = matches(val)
230-
} else {
231-
matched = false
232-
}
233-
234-
if isInverted {
235-
return !matched
236-
}
237-
return matched
238-
}
239-
240-
internal func matches(scalar: Unicode.Scalar) -> Bool {
241-
let matched: Bool
242-
if scalar.isASCII {
243-
let val = UInt8(ascii: scalar)
244-
matched = matches(val)
245-
} else {
246-
matched = false
247-
}
248-
249-
if isInverted {
250-
return !matched
251-
}
252-
return matched
253-
}
254-
255-
/// Joins another bitset from a Member of the same CustomCharacterClass
256-
internal func union(_ other: AsciiBitset) -> AsciiBitset {
257-
precondition(self.isInverted == other.isInverted)
258-
return AsciiBitset(
259-
a: self.a | other.a,
260-
b: self.b | other.b,
261-
isInverted: self.isInverted
262-
)
263-
}
264-
}
265167
}
266168

267169
@_spi(RegexBuilder)

0 commit comments

Comments
 (0)