Skip to content

Commit 097ffeb

Browse files
committed
Remove extra instructions and use payload bits instead
1 parent 1a359b4 commit 097ffeb

File tree

5 files changed

+77
-116
lines changed

5 files changed

+77
-116
lines changed

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ fileprivate extension Compiler.ByteCodeGen {
259259
// c.isCased ensures that c is not CR-LF, so we know that c is a single scalar
260260
builder.buildMatchScalarCaseInsensitive(c.unicodeScalars.last!, boundaryCheck: true)
261261
} else {
262-
builder.buildMatchCaseInsensitive(c)
262+
builder.buildMatch(c, isCaseInsensitive: true)
263263
}
264264
return
265265
}
@@ -272,7 +272,7 @@ fileprivate extension Compiler.ByteCodeGen {
272272
return
273273
}
274274

275-
builder.buildMatch(c)
275+
builder.buildMatch(c, isCaseInsensitive: false)
276276
}
277277

278278
mutating func emitAny() {

Sources/_StringProcessing/Engine/InstPayload.swift

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,19 @@ extension Instruction.Payload {
155155
return Unicode.Scalar(_value: UInt32(self.rawValue))
156156
}
157157

158+
init(scalar: Unicode.Scalar, caseInsensitive: Bool, boundaryCheck: Bool) {
159+
let raw = UInt64(scalar.value)
160+
+ (caseInsensitive ? 1 << 55: 0)
161+
+ (boundaryCheck ? 1 << 54 : 0)
162+
self.init(raw)
163+
}
164+
var scalarPayload: (Unicode.Scalar, caseInsensitive: Bool, boundaryCheck: Bool) {
165+
let caseInsensitive = (self.rawValue >> 55) & 1 == 1
166+
let boundaryCheck = (self.rawValue >> 54) & 1 == 1
167+
let scalar = Unicode.Scalar(_value: UInt32(self.rawValue & 0xFFFF_FFFF))
168+
return (scalar, caseInsensitive: caseInsensitive, boundaryCheck: boundaryCheck)
169+
}
170+
158171
init(sequence: SequenceRegister) {
159172
self.init(sequence)
160173
}
@@ -197,18 +210,20 @@ extension Instruction.Payload {
197210
interpret()
198211
}
199212

200-
init(element: ElementRegister) {
201-
self.init(element)
213+
init(element: ElementRegister, isCaseInsensitive: Bool) {
214+
self.init(isCaseInsensitive ? 1 : 0, element)
202215
}
203-
var element: ElementRegister {
204-
interpret()
216+
var elementPayload: (isCaseInsensitive: Bool, ElementRegister) {
217+
let pair: (UInt64, ElementRegister) = interpretPair()
218+
return (isCaseInsensitive: pair.0 == 1, pair.1)
205219
}
206220

207-
init(bitset: AsciiBitsetRegister) {
208-
self.init(bitset)
221+
init(bitset: AsciiBitsetRegister, isScalar: Bool) {
222+
self.init(isScalar ? 1 : 0, bitset)
209223
}
210-
var bitset: AsciiBitsetRegister {
211-
interpret()
224+
var bitsetPayload: (isScalar: Bool, AsciiBitsetRegister) {
225+
let pair: (UInt64, AsciiBitsetRegister) = interpretPair()
226+
return (isScalar: pair.0 == 1, pair.1)
212227
}
213228

214229
init(consumer: ConsumeFunctionRegister) {

Sources/_StringProcessing/Engine/Instruction.swift

Lines changed: 13 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -72,52 +72,28 @@ extension Instruction {
7272

7373
/// Composite assert-advance else restore.
7474
///
75-
/// match(_: EltReg)
75+
/// match(_: EltReg, isCaseInsensitive: Bool)
7676
///
77-
/// Operand: Element register to compare against.
77+
/// Operands:
78+
/// - Element register to compare against.
79+
/// - Boolean for if we should match in a case insensitive way
7880
case match
7981

80-
/// Matches the given character case insensitively
81-
///
82-
/// match(_: EltReg)
83-
///
84-
/// Operand: Element register to compare against.
85-
case matchCaseInsensitive
86-
87-
/// Match against a sequence of elements
82+
/// Match against a scalar and possibly perform a boundary check or match in a case insensitive way
8883
///
89-
/// matchSequence(_: SeqReg)
84+
/// matchScalar(_: Unicode.Scalar, isCaseInsensitive: Bool, boundaryCheck: Bool)
9085
///
91-
/// Operand: Sequence register to compare against.
92-
case matchSequence
93-
94-
/// Match against a scalar and perform a grapheme boundary check
95-
///
96-
/// matchScalar(_: Unicode.Scalar)
97-
/// Operand: Scalar value to match against
86+
/// Operands: Scalar value to match against and booleans
9887
case matchScalar
99-
/// Match against a scalar and do NOT perform a grapheme boundary check
100-
///
101-
/// matchScalarUnchecked(_: Unicode.Scalar)
102-
/// Operand: Scalar value to match against
103-
case matchScalarUnchecked
10488

105-
/// Match against a scalar case insensitively and perform a grapheme boundary check
89+
/// Match a character or a scalar against a set of valid ascii values stored in a bitset
10690
///
107-
/// matchScalarCaseInsensitive(_: Unicode.Scalar)
108-
/// Operand: Scalar value to match against
109-
case matchScalarCaseInsensitive
110-
/// Match against a scalar case insensitively and do NOT perform a grapheme boundary check
91+
/// matchBitset(_: AsciiBitsetRegister, isScalar: Bool)
11192
///
112-
/// matchScalarCaseInsensitiveUnchecked(_: Unicode.Scalar)
113-
/// Operand: Scalar value to match against
114-
case matchScalarCaseInsensitiveUnchecked
115-
116-
/// Match against a set of valid ascii values stored in a bitset
117-
/// Operand: Ascii bitset register containing the bitset
93+
/// Operand:
94+
/// - Ascii bitset register containing the bitset
95+
/// - Boolean for if we should match by scalar value
11896
case matchBitset
119-
/// matchBitset but emitted in unicode scalar semantic mode, matches and advances a single scalar
120-
case matchBitsetScalar
12197

12298
/// TODO: builtin assertions and anchors
12399
case builtinAssertion
@@ -337,7 +313,7 @@ extension Instruction {
337313
var elementRegister: ElementRegister? {
338314
switch opcode {
339315
case .match:
340-
return payload.element
316+
return payload.elementPayload.1
341317
default: return nil
342318
}
343319
}

Sources/_StringProcessing/Engine/MEBuilder.swift

Lines changed: 6 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -135,53 +135,32 @@ extension MEProgram.Builder {
135135
instructions.append(.init(.advance, .init(distance: n)))
136136
}
137137

138-
mutating func buildMatch(_ e: Character) {
138+
mutating func buildMatch(_ e: Character, isCaseInsensitive: Bool) {
139139
instructions.append(.init(
140-
.match, .init(element: elements.store(e))))
140+
.match, .init(element: elements.store(e), isCaseInsensitive: isCaseInsensitive)))
141141
}
142142

143-
mutating func buildMatchCaseInsensitive(_ e: Character) {
144-
instructions.append(.init(
145-
.matchCaseInsensitive, .init(element: elements.store(e))))
146-
}
147-
148-
mutating func buildMatchSequence<S: Sequence>(
149-
_ s: S
150-
) where S.Element == Character {
151-
instructions.append(.init(
152-
.matchSequence,
153-
.init(sequence: sequences.store(.init(s)))))
154-
}
155-
156143
mutating func buildMatchScalar(_ s: Unicode.Scalar, boundaryCheck: Bool) {
157-
if boundaryCheck {
158-
instructions.append(.init(.matchScalar, .init(scalar: s)))
159-
} else {
160-
instructions.append(.init(.matchScalarUnchecked, .init(scalar: s)))
161-
}
144+
instructions.append(.init(.matchScalar, .init(scalar: s, caseInsensitive: false, boundaryCheck: boundaryCheck)))
162145
}
163146

164147
mutating func buildMatchScalarCaseInsensitive(_ s: Unicode.Scalar, boundaryCheck: Bool) {
165-
if boundaryCheck {
166-
instructions.append(.init(.matchScalarCaseInsensitive, .init(scalar: s)))
167-
} else {
168-
instructions.append(.init(.matchScalarCaseInsensitiveUnchecked, .init(scalar: s)))
169-
}
148+
instructions.append(.init(.matchScalar, .init(scalar: s, caseInsensitive: true, boundaryCheck: boundaryCheck)))
170149
}
171150

172151

173152
mutating func buildMatchAsciiBitset(
174153
_ b: DSLTree.CustomCharacterClass.AsciiBitset
175154
) {
176155
instructions.append(.init(
177-
.matchBitset, .init(bitset: makeAsciiBitset(b))))
156+
.matchBitset, .init(bitset: makeAsciiBitset(b), isScalar: false)))
178157
}
179158

180159
mutating func buildScalarMatchAsciiBitset(
181160
_ b: DSLTree.CustomCharacterClass.AsciiBitset
182161
) {
183162
instructions.append(.init(
184-
.matchBitsetScalar, .init(bitset: makeAsciiBitset(b))))
163+
.matchBitset, .init(bitset: makeAsciiBitset(b), isScalar: true)))
185164
}
186165

187166
mutating func buildConsume(

Sources/_StringProcessing/Engine/Processor.swift

Lines changed: 33 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -427,55 +427,46 @@ extension Processor {
427427
}
428428

429429
case .match:
430-
let reg = payload.element
431-
if match(registers[reg]) {
432-
controller.step()
433-
}
434-
case .matchCaseInsensitive:
435-
let reg = payload.element
436-
if matchCaseInsensitive(registers[reg]) {
437-
controller.step()
438-
}
439-
440-
case .matchSequence:
441-
let reg = payload.sequence
442-
let seq = registers[reg]
443-
if matchSeq(seq) {
444-
controller.step()
430+
let (isCaseInsensitive, reg) = payload.elementPayload
431+
if isCaseInsensitive {
432+
if matchCaseInsensitive(registers[reg]) {
433+
controller.step()
434+
}
435+
} else {
436+
if match(registers[reg]) {
437+
controller.step()
438+
}
445439
}
440+
// case .matchSequence:
441+
// let reg = payload.sequence
442+
// let seq = registers[reg]
443+
// if matchSeq(seq) {
444+
// controller.step()
445+
// }
446446

447447
case .matchScalar:
448-
let scalar = payload.scalar
449-
if matchScalar(scalar, boundaryCheck: true) {
450-
controller.step()
451-
}
452-
case .matchScalarUnchecked:
453-
let scalar = payload.scalar
454-
if matchScalar(scalar, boundaryCheck: false) {
455-
controller.step()
456-
}
457-
case .matchScalarCaseInsensitive:
458-
let scalar = payload.scalar
459-
if matchScalarCaseInsensitive(scalar, boundaryCheck: true) {
460-
controller.step()
461-
}
462-
case .matchScalarCaseInsensitiveUnchecked:
463-
let scalar = payload.scalar
464-
if matchScalarCaseInsensitive(scalar, boundaryCheck: false) {
465-
controller.step()
448+
let (scalar, caseInsensitive, boundaryCheck) = payload.scalarPayload
449+
if caseInsensitive {
450+
if matchScalarCaseInsensitive(scalar, boundaryCheck: boundaryCheck) {
451+
controller.step()
452+
}
453+
} else {
454+
if matchScalar(scalar, boundaryCheck: boundaryCheck) {
455+
controller.step()
456+
}
466457
}
467458

468459
case .matchBitset:
469-
let reg = payload.bitset
460+
let (isScalar, reg) = payload.bitsetPayload
470461
let bitset = registers[reg]
471-
if matchBitset(bitset) {
472-
controller.step()
473-
}
474-
case .matchBitsetScalar:
475-
let reg = payload.bitset
476-
let bitset = registers[reg]
477-
if matchBitsetScalar(bitset) {
478-
controller.step()
462+
if isScalar {
463+
if matchBitsetScalar(bitset) {
464+
controller.step()
465+
}
466+
} else {
467+
if matchBitset(bitset) {
468+
controller.step()
469+
}
479470
}
480471

481472
case .consumeBy:

0 commit comments

Comments
 (0)