Skip to content

Commit 9a06a3e

Browse files
committed
Generalized bidirectional assertion support
Adds generalized assertion support as well support for most built-in assertions and anchors.
1 parent 0e25188 commit 9a06a3e

File tree

12 files changed

+301
-6
lines changed

12 files changed

+301
-6
lines changed

Sources/_MatchingEngine/Engine/Builder.swift

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ extension Program where Input.Element: Hashable {
66
var sequences = TypedSetVector<[Input.Element], _SequenceRegister>()
77
var strings = TypedSetVector<String, _StringRegister>()
88
var consumeFunctions: [ConsumeFunction] = []
9+
var assertionFunctions: [AssertionFunction] = []
910

1011
// Map tokens to actual addresses
1112
var addressTokens: [InstructionAddress?] = []
@@ -173,6 +174,13 @@ extension Program.Builder {
173174
.consumeBy, .init(consumer: makeConsumeFunction(p))))
174175
}
175176

177+
public mutating func buildAssert(
178+
by p: @escaping Program.AssertionFunction
179+
) {
180+
instructions.append(.init(
181+
.assertBy, .init(assertion: makeAssertionFunction(p))))
182+
}
183+
176184
public mutating func buildAssert(
177185
_ e: Input.Element, into cond: BoolRegister
178186
) {
@@ -243,13 +251,15 @@ extension Program.Builder {
243251
regInfo.ints = nextIntRegister.rawValue
244252
regInfo.positions = nextPositionRegister.rawValue
245253
regInfo.consumeFunctions = consumeFunctions.count
254+
regInfo.assertionFunctions = assertionFunctions.count
246255

247256
return Program(
248257
instructions: InstructionList(instructions),
249258
staticElements: elements.stored,
250259
staticSequences: sequences.stored,
251260
staticStrings: strings.stored,
252261
staticConsumeFunctions: consumeFunctions,
262+
staticAssertionFunctions: assertionFunctions,
253263
registerInfo: regInfo)
254264
}
255265

@@ -377,5 +387,11 @@ extension Program.Builder {
377387
defer { consumeFunctions.append(f) }
378388
return ConsumeFunctionRegister(consumeFunctions.count)
379389
}
390+
public mutating func makeAssertionFunction(
391+
_ f: @escaping Program.AssertionFunction
392+
) -> AssertionFunctionRegister {
393+
defer { assertionFunctions.append(f) }
394+
return AssertionFunctionRegister(assertionFunctions.count)
395+
}
380396
}
381397

Sources/_MatchingEngine/Engine/Engine.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// Currently, engine binds the type and consume binds an instance.
22
// But, we can play around with this.
3-
public struct Engine<Input: Collection> where Input.Element: Hashable {
3+
public struct Engine<Input: BidirectionalCollection> where Input.Element: Hashable {
44

55
var program: Program<Input>
66

Sources/_MatchingEngine/Engine/InstPayload.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ extension Instruction.Payload {
3939
case bool(BoolRegister)
4040
case element(ElementRegister)
4141
case consumer(ConsumeFunctionRegister)
42+
case assertion(AssertionFunctionRegister)
4243
case addr(InstructionAddress)
4344

4445
case packedImmInt(Int, IntRegister)
@@ -186,6 +187,13 @@ extension Instruction.Payload {
186187
interpret()
187188
}
188189

190+
init(assertion: AssertionFunctionRegister) {
191+
self.init(assertion)
192+
}
193+
var assertion: AssertionFunctionRegister {
194+
interpret()
195+
}
196+
189197
init(addr: InstructionAddress) {
190198
self.init(addr)
191199
}

Sources/_MatchingEngine/Engine/Instruction.swift

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,11 +162,18 @@ extension Instruction {
162162
/// Operand: Consume function register to call.
163163
case consumeBy
164164

165-
/// Custom assertion operation
165+
/// Custom lookaround assertion operation.
166+
/// Triggers a failure if customFunction returns false.
167+
///
168+
/// assert(
169+
/// _ customFunction: (
170+
/// input: Input,
171+
/// currentPos: Position,
172+
/// bounds: Range<Position>) -> Bool
173+
/// )
166174
///
167175
/// Operands: destination bool register, assert hook register
168-
static var assertHook: OpCode { fatalError() }
169-
176+
case assertBy
170177

171178
// MARK: Matching: Save points
172179

Sources/_MatchingEngine/Engine/Processor.swift

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ struct Controller {
1414
}
1515

1616
struct Processor<
17-
Input: Collection
17+
Input: BidirectionalCollection
1818
> where Input.Element: Equatable { // maybe Hashable?
1919
typealias Element = Input.Element
2020

@@ -347,6 +347,15 @@ extension Processor {
347347
advance(to: nextIndex)
348348
controller.step()
349349

350+
case .assertBy:
351+
let reg = payload.assertion
352+
let assertion = registers[reg]
353+
guard assertion(input, currentPosition, bounds) else {
354+
signalFailure()
355+
return
356+
}
357+
controller.step()
358+
350359
case .print:
351360
// TODO: Debug stream
352361
doPrint(registers[payload.string])

Sources/_MatchingEngine/Engine/Program.swift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
public struct Program<Input: Collection> where Input.Element: Equatable {
22
public typealias ConsumeFunction = (Input, Range<Input.Index>) -> Input.Index?
3+
public typealias AssertionFunction =
4+
(Input, Input.Index, Range<Input.Index>) -> Bool
35
var instructions: InstructionList<Instruction>
46

57
var staticElements: [Input.Element]
68
var staticSequences: [[Input.Element]]
79
var staticStrings: [String]
810
var staticConsumeFunctions: [ConsumeFunction]
11+
var staticAssertionFunctions: [AssertionFunction]
912

1013
var registerInfo: RegisterInfo
1114

Sources/_MatchingEngine/Engine/Registers.swift

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ extension Processor {
1515
// currently, these are static readonly
1616
var consumeFunctions: [Program<Input>.ConsumeFunction]
1717

18+
// currently, these are static readonly
19+
var assertionFunctions: [Program<Input>.AssertionFunction]
20+
1821
// currently, these are for comments and abort messages
1922
var strings: [String]
2023

@@ -63,6 +66,9 @@ extension Processor {
6366
subscript(_ i: ConsumeFunctionRegister) -> Program<Input>.ConsumeFunction {
6467
consumeFunctions[i.rawValue]
6568
}
69+
subscript(_ i: AssertionFunctionRegister) -> Program<Input>.AssertionFunction {
70+
assertionFunctions[i.rawValue]
71+
}
6672
}
6773
}
6874

@@ -82,6 +88,9 @@ extension Processor.Registers {
8288
self.consumeFunctions = program.staticConsumeFunctions
8389
assert(consumeFunctions.count == info.consumeFunctions)
8490

91+
self.assertionFunctions = program.staticAssertionFunctions
92+
assert(assertionFunctions.count == info.assertionFunctions)
93+
8594
self.strings = program.staticStrings
8695
assert(strings.count == info.strings)
8796

@@ -110,6 +119,7 @@ extension Program {
110119
var bools = 0
111120
var strings = 0
112121
var consumeFunctions = 0
122+
var assertionFunctions = 0
113123
var ints = 0
114124
var floats = 0
115125
var positions = 0

Sources/_MatchingEngine/Regex/AST/Atom.swift

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,3 +441,65 @@ extension AST.Atom {
441441
}
442442
}
443443
}
444+
445+
extension AST.Atom {
446+
/// Anchors and other built-in zero-width assertions
447+
public enum AssertionKind: String {
448+
/// \A
449+
case startOfSubject = #"\A"#
450+
451+
/// \Z
452+
case endOfSubjectBeforeNewline = #"\Z"#
453+
454+
/// \z
455+
case endOfSubject = #"\z"#
456+
457+
/// \K
458+
case resetStartOfMatch = #"\K"#
459+
460+
/// \G
461+
case firstMatchingPositionInSubject = #"\G"#
462+
463+
/// \y
464+
case textSegment = #"\y"#
465+
466+
/// \Y
467+
case notTextSegment = #"\Y"#
468+
469+
/// ^
470+
case startOfLine = #"^"#
471+
472+
/// $
473+
case endOfLine = #"$"#
474+
475+
/// \b (from outside a custom character class)
476+
case wordBoundary = #"\b"#
477+
478+
/// \B
479+
case notWordBoundary = #"\B"#
480+
481+
}
482+
483+
public var assertionKind: AssertionKind? {
484+
switch kind {
485+
case .startOfLine: return .startOfLine
486+
case .endOfLine: return .endOfLine
487+
488+
case .escaped(.wordBoundary): return .wordBoundary
489+
case .escaped(.notWordBoundary): return .notWordBoundary
490+
case .escaped(.startOfSubject): return .startOfSubject
491+
case .escaped(.endOfSubject): return .endOfSubject
492+
case .escaped(.textSegment): return .textSegment
493+
case .escaped(.notTextSegment): return .notTextSegment
494+
case .escaped(.endOfSubjectBeforeNewline):
495+
return .endOfSubjectBeforeNewline
496+
case .escaped(.firstMatchingPositionInSubject):
497+
return .firstMatchingPositionInSubject
498+
499+
case .escaped(.resetStartOfMatch): return .resetStartOfMatch
500+
501+
default: return nil
502+
}
503+
}
504+
}
505+

Sources/_MatchingEngine/Utility/TypedInt.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ public enum _StringRegister {}
144144
public typealias ConsumeFunctionRegister = TypedInt<_ConsumeFunctionRegister>
145145
public enum _ConsumeFunctionRegister {}
146146

147+
/// Used for assertion functions, e.g. anchors etc
148+
public typealias AssertionFunctionRegister = TypedInt<_AssertionFunctionRegister>
149+
public enum _AssertionFunctionRegister {}
150+
147151
/// UNIMPLEMENTED
148152
public typealias IntRegister = TypedInt<_IntRegister>
149153
public enum _IntRegister {}

Sources/_StringProcessing/CharacterClass.swift

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,3 +421,31 @@ extension AST.CustomCharacterClass {
421421
return self.isInverted ? cc.inverted : cc
422422
}
423423
}
424+
425+
extension CharacterClass {
426+
// FIXME: Calling on inverted sets wont be the same as the
427+
// inverse of a boundary if at the start or end of the
428+
// string. (Think through what we want: do it ourselves or
429+
// give the caller both options).
430+
func isBoundary(
431+
_ input: String,
432+
at pos: String.Index,
433+
bounds: Range<String.Index>
434+
) -> Bool {
435+
// FIXME: How should we handle bounds?
436+
// We probably need two concepts
437+
if input.isEmpty { return false }
438+
if pos == input.startIndex {
439+
return self.matches(in: input, at: pos) != nil
440+
}
441+
let priorIdx = input.index(before: pos)
442+
if pos == input.endIndex {
443+
return self.matches(in: input, at: priorIdx) != nil
444+
}
445+
446+
let prior = self.matches(in: input, at: priorIdx) != nil
447+
let current = self.matches(in: input, at: pos) != nil
448+
return prior != current
449+
}
450+
451+
}

0 commit comments

Comments
 (0)