diff --git a/Sources/DSLParser/DSLParser.swift b/Sources/DSLParser/DSLParser.swift deleted file mode 100644 index 9f73907..0000000 --- a/Sources/DSLParser/DSLParser.swift +++ /dev/null @@ -1,32 +0,0 @@ -import Foundation -@testable import _RegexParser - -struct DSLParser { - func parse(_ pattern: String) throws -> [Token] { - let ast = try _RegexParser.parse(pattern, .traditional) - - var printer = PrettyPrinter() - printer.printAsPattern(ast) - _ = printer.finish() - - return printer.locationMappings.map { (sourceLocation, patternLocation) in - Token( - sourceLocation: Location(start: sourceLocation.start, end: sourceLocation.end), - patternLocation: Location( - start: patternLocation.start.utf16Offset(in: pattern), - end: patternLocation.end.utf16Offset(in: pattern) - ) - ) - } - } -} - -struct Token: Codable { - let sourceLocation: Location - let patternLocation: Location -} - -struct Location: Codable { - let start: Int - let end: Int -} diff --git a/Sources/DSLParser/Main.swift b/Sources/DSLParser/Main.swift deleted file mode 100644 index f25af3f..0000000 --- a/Sources/DSLParser/Main.swift +++ /dev/null @@ -1,27 +0,0 @@ -import Foundation - -@main -struct Main { - static func main() throws { - do { - let pattern = CommandLine.arguments[1] - - let parser = DSLParser() - let tokens = try parser.parse(pattern) - - let data = try JSONEncoder().encode(tokens) - print(String(data: data, encoding: .utf8) ?? "") - } catch { - print("\(error)", to:&standardError) - } - } -} - -var standardError = FileHandle.standardError - -extension FileHandle : TextOutputStream { - public func write(_ string: String) { - guard let data = string.data(using: .utf8) else { return } - self.write(data) - } -} diff --git a/Sources/DSLParser/PrettyPrinter.swift b/Sources/DSLParser/PrettyPrinter.swift deleted file mode 100644 index 16b08fc..0000000 --- a/Sources/DSLParser/PrettyPrinter.swift +++ /dev/null @@ -1,150 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// This source file is part of the Swift.org open source project -// -// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors -// Licensed under Apache License v2.0 with Runtime Library Exception -// -// See https://swift.org/LICENSE.txt for license information -// -//===----------------------------------------------------------------------===// - -@testable import _RegexParser - -/// State used when to pretty-printing regex ASTs. -public struct PrettyPrinter { - // Configuration - - /// The maximum number number of levels, from the root of the tree, - /// at which to perform pattern conversion. - /// - /// A `nil` value indicates that there is no maximum, - /// and pattern conversion always takes place. - public var maxTopDownLevels: Int? - - /// The maximum number number of levels, from the leaf nodes of the tree, - /// at which to perform pattern conversion. - /// - /// A `nil` value indicates that there is no maximum, - /// and pattern conversion always takes place. - public var minBottomUpLevels: Int? - - /// The number of spaces used for indentation. - public var indentWidth = 2 - - // Internal state - - // The output string we're building up - var result = "" - - var currentASTNode: AST.Node! - var locationMappings = [((start: Int, end: Int), SourceLocation)]() - - // Whether next print needs to indent - fileprivate var startOfLine = true - - // The indentation level - fileprivate var indentLevel = 0 - - // The current default quantification behavior - public var quantificationBehavior: AST.Quantification.Kind = .eager -} - -// MARK: - Raw interface -extension PrettyPrinter { - // This might be necessary if `fileprivate` above suppresses - // default struct inits. - public init( - maxTopDownLevels: Int? = nil, - minBottomUpLevels: Int? = nil - ) { - self.maxTopDownLevels = maxTopDownLevels - self.minBottomUpLevels = minBottomUpLevels - } - - /// Outputs a string directly, without termination or - /// indentation, and without updating any internal state. - /// - /// This is the low-level interface to the pretty printer. - /// - /// - Note: If `s` includes a newline, even at the end, - /// this method does not update any tracking state. - public mutating func output(_ s: String) { - result += s - } - - /// Terminates a line, updating any relevant state. - public mutating func terminateLine() { - output("\n") - startOfLine = true - } - - /// Indents a new line, if at the start of a line, otherwise - /// does nothing. - /// - /// This function updates internal state. - public mutating func indent() { - guard startOfLine else { return } - let numCols = indentLevel * indentWidth - output(String(repeating: " ", count: numCols)) - startOfLine = false - } - - // Finish, flush, and clear. - // - // - Returns: The rendered output. - public mutating func finish() -> String { - defer { result = "" } - return result - } - - public var depth: Int { indentLevel } -} - -// MARK: - Pretty-print interface -extension PrettyPrinter { - /// Print out a new entry. - /// - /// This method indents `s`, updates any internal state, - /// and terminates the current line. - public mutating func print(_ s: String) { - indent() - output("\(s)") - terminateLine() - } - - /// Prints out a new entry by invoking `f` until it returns `nil`. - /// - /// This method indents `s`, updates any internal state, - /// and terminates the current line. - public mutating func printLine(_ f: () -> String?) { - // TODO: What should we do if `f` never returns non-nil? - indent() - while let s = f() { - output(s) - } - terminateLine() - } - - /// Executes `f` at one increased level of indentation. - public mutating func printIndented( - _ f: (inout Self) -> () - ) { - self.indentLevel += 1 - f(&self) - self.indentLevel -= 1 - } - - /// Executes `f` inside an indented block, which has a header - /// and delimiters. - public mutating func printBlock( - _ header: String, - startDelimiter: String = "{", - endDelimiter: String = "}", - _ f: (inout Self) -> () - ) { - print("\(header) \(startDelimiter)") - printIndented(f) - print(endDelimiter) - } -} diff --git a/Sources/DSLParser/PrintAsCanonical.swift b/Sources/DSLParser/PrintAsCanonical.swift deleted file mode 100644 index 3073712..0000000 --- a/Sources/DSLParser/PrintAsCanonical.swift +++ /dev/null @@ -1,343 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// This source file is part of the Swift.org open source project -// -// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors -// Licensed under Apache License v2.0 with Runtime Library Exception -// -// See https://swift.org/LICENSE.txt for license information -// -//===----------------------------------------------------------------------===// - -@testable import _RegexParser - -// TODO: Round-tripping tests - -extension AST { - /// Renders using Swift's preferred regex literal syntax. - public func renderAsCanonical( - showDelimiters delimiters: Bool = false, - terminateLine: Bool = false - ) -> String { - var printer = PrettyPrinter() - printer.printAsCanonical( - self, - delimiters: delimiters, - terminateLine: terminateLine) - return printer.finish() - } -} - -extension AST.Node { - /// Renders using Swift's preferred regex literal syntax. - public func renderAsCanonical( - showDelimiters delimiters: Bool = false, - terminateLine: Bool = false - ) -> String { - AST(self, globalOptions: nil, diags: Diagnostics()).renderAsCanonical( - showDelimiters: delimiters, terminateLine: terminateLine) - } -} - -extension PrettyPrinter { - /// Outputs a regular expression abstract syntax tree in canonical form, - /// indenting and terminating the line, and updating its internal state. - /// - /// - Parameter ast: The abstract syntax tree of the regular expression being output. - /// - Parameter delimiters: Whether to include commas between items. - /// - Parameter terminateLine: Whether to include terminate the line. - public mutating func printAsCanonical( - _ ast: AST, - delimiters: Bool = false, - terminateLine terminate: Bool = true - ) { - indent() - if delimiters { output("'/") } - if let opts = ast.globalOptions { - outputAsCanonical(opts) - } - outputAsCanonical(ast.root) - if delimiters { output("/'") } - if terminate { - terminateLine() - } - } - - /// Outputs a regular expression abstract syntax tree in canonical form, - /// without indentation, line termation, or affecting its internal state. - mutating func outputAsCanonical(_ ast: AST.Node) { - switch ast { - case let .alternation(a): - for idx in a.children.indices { - outputAsCanonical(a.children[idx]) - if a.children.index(after: idx) != a.children.endIndex { - output("|") - } - } - case let .concatenation(c): - c.children.forEach { outputAsCanonical($0) } - case let .group(g): - output(g.kind.value._canonicalBase) - outputAsCanonical(g.child) - output(")") - - case let .conditional(c): - output("(") - outputAsCanonical(c.condition) - outputAsCanonical(c.trueBranch) - output("|") - outputAsCanonical(c.falseBranch) - - case let .quantification(q): - outputAsCanonical(q.child) - output(q.amount.value._canonicalBase) - output(q.kind.value._canonicalBase) - - case let .quote(q): - output(q._canonicalBase) - - case let .trivia(t): - output(t._canonicalBase) - - case let .interpolation(i): - output(i._canonicalBase) - - case let .atom(a): - output(a._canonicalBase) - - case let .customCharacterClass(ccc): - outputAsCanonical(ccc) - - case let .absentFunction(abs): - outputAsCanonical(abs) - - case .empty: - output("") - } - } - - mutating func outputAsCanonical( - _ ccc: AST.CustomCharacterClass - ) { - output(ccc.start.value._canonicalBase) - ccc.members.forEach { outputAsCanonical($0) } - output("]") - } - - mutating func outputAsCanonical( - _ member: AST.CustomCharacterClass.Member - ) { - // TODO: Do we need grouping or special escape rules? - switch member { - case .custom(let ccc): - outputAsCanonical(ccc) - case .range(let r): - output(r.lhs._canonicalBase) - output("-") - output(r.rhs._canonicalBase) - case .atom(let a): - output(a._canonicalBase) - case .quote(let q): - output(q._canonicalBase) - case .trivia(let t): - output(t._canonicalBase) - case .setOperation: - output("/* TODO: set operation \(self) */") - } - } - - mutating func outputAsCanonical(_ condition: AST.Conditional.Condition) { - output("(/*TODO: conditional \(condition) */)") - } - - mutating func outputAsCanonical(_ abs: AST.AbsentFunction) { - output("(?~") - switch abs.kind { - case .repeater(let a): - outputAsCanonical(a) - case .expression(let a, _, let child): - output("|") - outputAsCanonical(a) - output("|") - outputAsCanonical(child) - case .stopper(let a): - output("|") - outputAsCanonical(a) - case .clearer: - output("|") - } - output(")") - } - - mutating func outputAsCanonical(_ opts: AST.GlobalMatchingOptionSequence) { - for opt in opts.options { - output(opt._canonicalBase) - } - } -} - -extension AST.Quote { - var _canonicalBase: String { - // TODO: Is this really what we want? - "\\Q\(literal)\\E" - } -} - -extension AST.Interpolation { - var _canonicalBase: String { - "<{\(contents)}>" - } -} - -extension AST.Group.Kind { - var _canonicalBase: String { - switch self { - case .capture: return "(" - case .namedCapture(let n): return "(?<\(n.value)>" - case .balancedCapture(let b): return "(?<\(b._canonicalBase)>" - case .nonCapture: return "(?:" - case .nonCaptureReset: return "(?|" - case .atomicNonCapturing: return "(?>" - case .lookahead: return "(?=" - case .negativeLookahead: return "(?!" - case .nonAtomicLookahead: return "(?*" - case .lookbehind: return "(?<=" - case .negativeLookbehind: return "(?" - } -} - -extension AST.Atom { - var _canonicalBase: String { - if let lit = self.literalStringValue { - // FIXME: We may have to re-introduce escapes - // For example, `\.` will come back as "." instead - // For now, catch the most common offender - if lit == "." { return "\\." } - return lit - } - switch self.kind { - case .caretAnchor: - return "^" - case .dollarAnchor: - return "$" - case .escaped(let e): - return "\\\(e.character)" - case .backreference(let br): - return br._canonicalBase - - default: - return "/* TODO: atom \(self) */" - } - } -} - -extension AST.Reference { - var _canonicalBase: String { - if self.recursesWholePattern { - return "(?R)" - } - switch kind { - case .absolute(let i): - // TODO: Which should we prefer, this or `\g{n}`? - return "\\\(i)" - case .relative: - return "/* TODO: relative reference \(self) */" - case .named: - return "/* TODO: named reference \(self) */" - } - } -} - -extension AST.CustomCharacterClass.Start { - var _canonicalBase: String { self.rawValue } -} - -extension AST.Group.BalancedCapture { - var _canonicalBase: String { - "\(name?.value ?? "")-\(priorName.value)" - } -} - -extension AST.GlobalMatchingOption.NewlineMatching { - var _canonicalBase: String { - switch self { - case .carriageReturnOnly: return "CR" - case .linefeedOnly: return "LF" - case .carriageAndLinefeedOnly: return "CRLF" - case .anyCarriageReturnOrLinefeed: return "ANYCRLF" - case .anyUnicode: return "ANY" - case .nulCharacter: return "NUL" - } - } -} - -extension AST.GlobalMatchingOption.NewlineSequenceMatching { - var _canonicalBase: String { - switch self { - case .anyCarriageReturnOrLinefeed: return "BSR_ANYCRLF" - case .anyUnicode: return "BSR_UNICODE" - } - } -} - -extension AST.GlobalMatchingOption.Kind { - var _canonicalBase: String { - switch self { - case .limitDepth(let i): return "LIMIT_DEPTH=\(i._canonicalBase)" - case .limitHeap(let i): return "LIMIT_HEAP=\(i._canonicalBase)" - case .limitMatch(let i): return "LIMIT_MATCH=\(i._canonicalBase)" - case .notEmpty: return "NOTEMPTY" - case .notEmptyAtStart: return "NOTEMPTY_ATSTART" - case .noAutoPossess: return "NO_AUTO_POSSESS" - case .noDotStarAnchor: return "NO_DOTSTAR_ANCHOR" - case .noJIT: return "NO_JIT" - case .noStartOpt: return "NO_START_OPT" - case .utfMode: return "UTF" - case .unicodeProperties: return "UCP" - case .newlineMatching(let m): return m._canonicalBase - case .newlineSequenceMatching(let m): return m._canonicalBase - } - } -} - -extension AST.GlobalMatchingOption { - var _canonicalBase: String { "(*\(kind._canonicalBase))"} -} - -extension AST.Trivia { - var _canonicalBase: String { - // TODO: We might want to output comments... - "" - } -} diff --git a/Sources/DSLParser/PrintAsPattern.swift b/Sources/DSLParser/PrintAsPattern.swift deleted file mode 100644 index 1b1e695..0000000 --- a/Sources/DSLParser/PrintAsPattern.swift +++ /dev/null @@ -1,1402 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// This source file is part of the Swift.org open source project -// -// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors -// Licensed under Apache License v2.0 with Runtime Library Exception -// -// See https://swift.org/LICENSE.txt for license information -// -//===----------------------------------------------------------------------===// - -@testable import _RegexParser -@testable @_spi(RegexBuilder) import _StringProcessing - -// TODO: Add an expansion level, both from top to bottom. -// After `printAsCanonical` is fleshed out, these two -// printers can call each other. This would enable -// incremental conversion, such that leaves remain -// as canonical regex literals. - -/// Renders an AST tree as a Pattern DSL. -/// -/// - Parameters: -/// - ast: A `_RegexParser.AST` instance. -/// - maxTopDownLevels: The number of levels down from the root of the tree -/// to perform conversion. `nil` means no limit. -/// - minBottomUpLevels: The number of levels up from the leaves of the tree -/// to perform conversion. `nil` means no limit. -/// - Returns: A string representation of `ast` in the `RegexBuilder` syntax. -@_spi(PatternConverter) -public func renderAsBuilderDSL( - ast: Any, - maxTopDownLevels: Int? = nil, - minBottomUpLevels: Int? = nil -) -> String { - var printer = PrettyPrinter( - maxTopDownLevels: maxTopDownLevels, - minBottomUpLevels: minBottomUpLevels) - printer.printAsPattern(ast as! AST) - return printer.finish() -} - -extension PrettyPrinter { - /// If pattern printing should back off, prints the regex literal and returns true - mutating func patternBackoff( - _ ast: T - ) -> Bool { - if let max = maxTopDownLevels, depth >= max { - return true - } - if let min = minBottomUpLevels, ast.height <= min { - return true - } - return false - } - - mutating func printBackoff(_ node: DSLTree.Node) { - precondition(node.astNode != nil, "unconverted node") - printAsCanonical( - .init(node.astNode!, globalOptions: nil, diags: Diagnostics()), - delimiters: true) - } - - mutating func printAsPattern(_ ast: AST) { - // TODO: Handle global options... - let node = ast.root.dslTreeNode - - // If we have any named captures, create references to those above the regex. - let namedCaptures = node.getNamedCaptures() - - for namedCapture in namedCaptures { - print("let \(namedCapture) = Reference(Substring.self)") - } - - let currentASTNode = self.currentASTNode! - printBlock("Regex") { printer in - let start = printer.result.utf16.count - printer.printAsPattern(convertedFromAST: node, isTopLevel: true) - printer.locationMappings.append( - ((start, printer.result.utf16.count), currentASTNode.location) - ) - } - } - - // FIXME: Use of back-offs like height and depth - // imply that this DSLTree node has a corresponding - // AST. That's not always true, and it would be nice - // to have a non-backing-off pretty-printer that this - // can defer to. - private mutating func printAsPattern( - convertedFromAST node: DSLTree.Node, isTopLevel: Bool = false - ) { - if patternBackoff(DSLTree._Tree(node)) { - printBackoff(node) - return - } - - if let astNode = node.astNode { - currentASTNode = astNode - } - - switch node { - - case let .orderedChoice(a): - let currentASTNode = self.currentASTNode! - printBlock("ChoiceOf") { printer in - a.forEach { - let start = printer.result.utf16.count - printer.printAsPattern(convertedFromAST: $0) - printer.locationMappings.append( - ((start, printer.result.utf16.count), currentASTNode.location) - ) - } - } - - case let .concatenation(c): - printConcatenationAsPattern(c, isTopLevel: isTopLevel) - - case let .nonCapturingGroup(kind, child): - switch kind.ast { - case .atomicNonCapturing: - let currentASTNode = self.currentASTNode! - printBlock("Local") { printer in - let start = printer.result.utf16.count - printer.printAsPattern(convertedFromAST: child) - printer.locationMappings.append( - ((start, printer.result.utf16.count), currentASTNode.location) - ) - } - - case .lookahead: - let currentASTNode = self.currentASTNode! - printBlock("Lookahead") { printer in - let start = printer.result.utf16.count - printer.printAsPattern(convertedFromAST: child) - printer.locationMappings.append( - ((start, printer.result.utf16.count), currentASTNode.location) - ) - } - - case .negativeLookahead: - let currentASTNode = self.currentASTNode! - printBlock("NegativeLookahead") { printer in - let start = printer.result.utf16.count - printer.printAsPattern(convertedFromAST: child) - printer.locationMappings.append( - ((start, printer.result.utf16.count), currentASTNode.location) - ) - } - - default: - let start = result.utf16.count - printAsPattern(convertedFromAST: child) - locationMappings.append( - ((start, result.utf16.count), currentASTNode.location) - ) - } - - case let .capture(name, _, child, _): - var cap = "Capture" - if let n = name { - cap += "(as: \(n))" - } - let currentASTNode = self.currentASTNode! - printBlock(cap) { printer in - let start = printer.result.utf16.count - printer.printAsPattern(convertedFromAST: child) - printer.locationMappings.append( - ((start, printer.result.utf16.count), currentASTNode.location) - ) - } - - case let .ignoreCapturesInTypedOutput(child): - let start = result.utf16.count - printAsPattern(convertedFromAST: child, isTopLevel: isTopLevel) - locationMappings.append( - ((start, result.utf16.count), currentASTNode.location) - ) - - case .conditional: - let start = result.utf16.count - print("/* TODO: conditional */") - locationMappings.append( - ((start, result.utf16.count), currentASTNode.location) - ) - - case let .quantification(amount, kind, child): - let amountStr = amount.ast._patternBase - var kind = kind.ast?._patternBase ?? "" - - // If we've updated our quantification behavior, then use that. This - // occurs in scenarios where we use things like '(?U)' to indicate that - // we want reluctant default quantification behavior. - if quantificationBehavior != .eager { - kind = quantificationBehavior._patternBase - } - - var blockName = "\(amountStr)(\(kind))" - - if kind == ".eager" { - blockName = "\(amountStr)" - } - - // Special case single child character classes for repetition nodes. - // This lets us do something like the following: - // - // OneOrMore(.digit) - // vs - // OneOrMore { - // One(.digit) - // } - // - func printAtom(_ pattern: String) { - indent() - - if kind != ".eager" { - blockName.removeLast() - output("\(blockName), ") - } else { - output("\(blockName)(") - } - - output("\(pattern))") - terminateLine() - } - - func printSimpleCCC( - _ ccc: DSLTree.CustomCharacterClass - ) { - indent() - - if kind != ".eager" { - blockName.removeLast() - output("\(blockName), ") - } else { - output("\(blockName)(") - } - - printAsPattern(ccc, wrap: false, terminateLine: false) - output(")") - terminateLine() - } - - // We can only do this for Optionally, ZeroOrMore, and OneOrMore. Cannot - // do it right now for Repeat. - if amount.ast.supportsInlineComponent { - switch child { - case let .atom(a): - if let pattern = a._patternBase(&self), pattern.canBeWrapped { - printAtom(pattern.0) - return - } - - break - case let .customCharacterClass(ccc): - if ccc.isSimplePrint { - printSimpleCCC(ccc) - return - } - - break - - case let .convertedRegexLiteral(.atom(a), _): - if let pattern = a._patternBase(&self), pattern.canBeWrapped { - printAtom(pattern.0) - return - } - - break - case let .convertedRegexLiteral(.customCharacterClass(ccc), _): - if ccc.isSimplePrint { - printSimpleCCC(ccc) - return - } - - break - default: - break - } - } - - printBlock(blockName) { printer in - printer.printAsPattern(convertedFromAST: child) - } - - case let .atom(a): - if case .unconverted(let a) = a, a.ast.isUnprintableAtom { - print("#/\(a.ast._regexBase)/#") - return - } - - if let pattern = a._patternBase(&self) { - if pattern.canBeWrapped { - print("One(\(pattern.0))") - } else { - print(pattern.0) - } - } - - case .trivia: - // We never print trivia - break - - case .empty: - print("") - - case let .quotedLiteral(v): - print(v._quoted) - - case let .convertedRegexLiteral(n, _): - // FIXME: This recursion coordinates with back-off - // check above, so it should work out. Need a - // cleaner way to do this. This means the argument - // label is a lie. - printAsPattern(convertedFromAST: n, isTopLevel: isTopLevel) - - case let .customCharacterClass(ccc): - printAsPattern(ccc) - - case .consumer: - print("/* TODO: consumers */") - case .matcher: - print("/* TODO: consumer validators */") - case .characterPredicate: - print("/* TODO: character predicates */") - - case .absentFunction: - print("/* TODO: absent function */") - } - } - - enum NodeToPrint { - case dslNode(DSLTree.Node) - case stringLiteral(String) - } - - mutating func printAsPattern(_ node: NodeToPrint) { - switch node { - case .dslNode(let n): - printAsPattern(convertedFromAST: n) - case .stringLiteral(let str): - print(str) - } - } - - mutating func printConcatenationAsPattern( - _ nodes: [DSLTree.Node], isTopLevel: Bool - ) { - // We need to coalesce any adjacent character and scalar elements into a - // string literal, preserving scalar syntax. - let nodes = nodes - .map { NodeToPrint.dslNode($0.lookingThroughConvertedLiteral) } - .coalescing( - with: StringLiteralBuilder(), into: { .stringLiteral($0.result) } - ) { literal, node in - guard case .dslNode(let node) = node else { return false } - switch node { - case let .atom(.char(c)): - literal.append(c) - return true - case let .atom(.scalar(s)): - literal.append(unescaped: s._dslBase) - return true - case .quotedLiteral(let q): - literal.append(q) - return true - case .trivia: - // Trivia can be completely ignored if we've already coalesced - // something. - return !literal.isEmpty - default: - return false - } - } - if isTopLevel || nodes.count == 1 { - // If we're at the top level, or we coalesced everything into a single - // element, we don't need to print a surrounding Regex { ... }. - for n in nodes { - printAsPattern(n) - } - return - } - printBlock("Regex") { printer in - for n in nodes { - printer.printAsPattern(n) - } - } - } - - mutating func printAsPattern( - _ ccc: DSLTree.CustomCharacterClass, - wrap: Bool = true, - terminateLine: Bool = true - ) { - if ccc.hasUnprintableProperty { - printAsRegex(ccc, terminateLine: terminateLine) - return - } - - defer { - if ccc.isInverted { - printIndented { printer in - printer.indent() - printer.output(".inverted") - - if terminateLine { - printer.terminateLine() - } - } - } - } - - // If we only have 1 member, then we can emit it without the extra - // CharacterClass initialization - if ccc.members.count == 1 { - printAsPattern(ccc.members[0], wrap: wrap) - - if terminateLine { - self.terminateLine() - } - - return - } - - var charMembers = StringLiteralBuilder() - - // This iterates through all of the character class members collecting all - // of the members who can be stuffed into a singular '.anyOf(...)' vs. - // having multiple. This does alter the original representation, but the - // result is the same. For example: - // - // Convert: '[abc\d\Qxyz\E[:space:]def]' - // - // CharacterClass( - // .anyOf("abcxyzdef"), - // .digit, - // .whitespace - // ) - // - // This also allows us to determine if after collecting all of the members - // and stuffing them if we can just emit a standalone '.anyOf' instead of - // initializing a 'CharacterClass'. - let nonCharMembers = ccc.members.filter { - switch $0 { - case let .atom(a): - switch a { - case let .char(c): - charMembers.append(c) - return false - case let .scalar(s): - charMembers.append(unescaped: s._dslBase) - return false - case .unconverted(_): - return true - default: - return true - } - - case let .quotedLiteral(s): - charMembers.append(s) - return false - - case .trivia(_): - return false - - default: - return true - } - } - - // Also in the same vein, if we have a few atom members but no - // nonAtomMembers, then we can emit a single .anyOf(...) for them. - if !charMembers.isEmpty, nonCharMembers.isEmpty { - let anyOf = ".anyOf(\(charMembers))" - - indent() - - if wrap { - output("One(\(anyOf))") - } else { - output(anyOf) - } - - if terminateLine { - self.terminateLine() - } - - return - } - - // Otherwise, use the CharacterClass initialization with multiple members. - print("CharacterClass(") - printIndented { printer in - printer.indent() - - if !charMembers.isEmpty { - printer.output(".anyOf(\(charMembers))") - - if nonCharMembers.count > 0 { - printer.output(",") - } - - printer.terminateLine() - } - - for (i, member) in nonCharMembers.enumerated() { - printer.printAsPattern(member, wrap: false) - - if i != nonCharMembers.count - 1 { - printer.output(",") - } - - printer.terminateLine() - } - } - - indent() - output(")") - - if terminateLine { - self.terminateLine() - } - } - - // TODO: Some way to integrate this with conversion... - mutating func printAsPattern( - _ member: DSLTree.CustomCharacterClass.Member, - wrap: Bool = true - ) { - switch member { - case let .custom(ccc): - printAsPattern(ccc, terminateLine: false) - - case let .range(lhs, rhs): - if let lhs = lhs._patternBase(&self), let rhs = rhs._patternBase(&self) { - indent() - output("(") - output(lhs.0) - output("...") - output(rhs.0) - output(")") - } - - case let .atom(a): - indent() - switch a { - case let .char(c): - - if wrap { - output("One(.anyOf(\(String(c)._quoted)))") - } else { - output(".anyOf(\(String(c)._quoted))") - } - - case let .scalar(s): - - if wrap { - output("One(.anyOf(\(s._dslBase._bareQuoted)))") - } else { - output(".anyOf(\(s._dslBase._bareQuoted))") - } - - case let .unconverted(a): - let base = a.ast._patternBase - - if base.canBeWrapped, wrap { - output("One(\(base.0))") - } else { - output(base.0) - } - - case let .characterClass(cc): - if wrap { - output("One(\(cc._patternBase))") - } else { - output(cc._patternBase) - } - - default: - print(" // TODO: Atom \(a)") - } - - case .quotedLiteral(let s): - - if wrap { - output("One(.anyOf(\(s._quoted)))") - } else { - output(".anyOf(\(s._quoted))") - } - - case .trivia(_): - // We never print trivia - break - - case .intersection(let first, let second): - if wrap, first.isSimplePrint { - indent() - output("One(") - } - - printAsPattern(first, wrap: false) - printIndented { printer in - printer.indent() - printer.output(".intersection(") - printer.printAsPattern(second, wrap: false, terminateLine: false) - printer.output(")") - } - - if wrap, first.isSimplePrint { - output(")") - } - - case .subtraction(let first, let second): - if wrap, first.isSimplePrint { - indent() - output("One(") - } - - printAsPattern(first, wrap: false) - printIndented { printer in - printer.indent() - printer.output(".subtracting(") - printer.printAsPattern(second, wrap: false, terminateLine: false) - printer.output(")") - } - - if wrap, first.isSimplePrint { - output(")") - } - - case .symmetricDifference(let first, let second): - if wrap, first.isSimplePrint { - indent() - output("One(") - } - - printAsPattern(first, wrap: false) - printIndented { printer in - printer.indent() - printer.output(".symmetricDifference(") - printer.printAsPattern(second, wrap: false, terminateLine: false) - printer.output(")") - } - - if wrap, first.isSimplePrint { - output(")") - } - } - } - - mutating func printAsRegex( - _ ccc: DSLTree.CustomCharacterClass, - asFullRegex: Bool = true, - terminateLine: Bool = true - ) { - indent() - - if asFullRegex { - output("#/") - } - - output("[") - - if ccc.isInverted { - output("^") - } - - for member in ccc.members { - printAsRegex(member) - } - - output("]") - - if asFullRegex { - if terminateLine { - print("/#") - } else { - output("/#") - } - } - } - - mutating func printAsRegex(_ member: DSLTree.CustomCharacterClass.Member) { - switch member { - case let .custom(ccc): - printAsRegex(ccc, terminateLine: false) - - case let .range(lhs, rhs): - output(lhs._regexBase) - output("-") - output(rhs._regexBase) - - case let .atom(a): - switch a { - case let .char(c): - output(String(c)) - case let .unconverted(a): - output(a.ast._regexBase) - default: - print(" // TODO: Atom \(a)") - } - - case .quotedLiteral(let s): - output("\\Q\(s)\\E") - - case .trivia(_): - // We never print trivia - break - - case .intersection(let first, let second): - printAsRegex(first, asFullRegex: false, terminateLine: false) - output("&&") - printAsRegex(second, asFullRegex: false, terminateLine: false) - - case .subtraction(let first, let second): - printAsRegex(first, asFullRegex: false, terminateLine: false) - output("--") - printAsRegex(second, asFullRegex: false, terminateLine: false) - - case .symmetricDifference(let first, let second): - printAsRegex(first, asFullRegex: false, terminateLine: false) - output("~~") - printAsRegex(second, asFullRegex: false, terminateLine: false) - } - } -} - -extension String { - fileprivate var _escaped: String { - _replacing(#"\"#, with: #"\\"#)._replacing(#"""#, with: #"\""#) - } - - fileprivate var _quoted: String { - _escaped._bareQuoted - } - - fileprivate var _bareQuoted: String { - #""\#(self)""# - } -} - -extension UnicodeScalar { - var _dslBase: String { "\\u{\(String(value, radix: 16, uppercase: true))}" } -} - -/// A helper for building string literals, which handles escaping the contents -/// appended. -fileprivate struct StringLiteralBuilder { - private var contents = "" - - var result: String { contents._bareQuoted } - var isEmpty: Bool { contents.isEmpty } - - mutating func append(_ str: String) { - contents += str._escaped - } - mutating func append(_ c: Character) { - contents += String(c)._escaped - } - mutating func append(unescaped str: String) { - contents += str - } -} -extension StringLiteralBuilder: CustomStringConvertible { - var description: String { result } -} - -extension DSLTree.Atom.Assertion { - // TODO: Some way to integrate this with conversion... - var _patternBase: String { - switch self { - case .startOfLine: - return "Anchor.startOfLine" - case .endOfLine: - return "Anchor.endOfLine" - case .caretAnchor: - // The DSL doesn't have an equivalent to this, so print as regex. - return "/^/" - case .dollarAnchor: - // The DSL doesn't have an equivalent to this, so print as regex. - return "/$/" - case .wordBoundary: - return "Anchor.wordBoundary" - case .notWordBoundary: - return "Anchor.wordBoundary.inverted" - case .startOfSubject: - return "Anchor.startOfSubject" - case .endOfSubject: - return "Anchor.endOfSubject" - case .endOfSubjectBeforeNewline: - return "Anchor.endOfSubjectBeforeNewline" - case .textSegment: - return "Anchor.textSegmentBoundary" - case .notTextSegment: - return "Anchor.textSegmentBoundary.inverted" - case .firstMatchingPositionInSubject: - return "Anchor.firstMatchingPositionInSubject" - - case .resetStartOfMatch: - return "TODO: Assertion resetStartOfMatch" - } - } -} - -extension DSLTree.Atom.CharacterClass { - var _patternBase: String { - switch self { - case .anyGrapheme: - return ".anyGraphemeCluster" - case .digit: - return ".digit" - case .notDigit: - return ".digit.inverted" - case .word: - return ".word" - case .notWord: - return ".word.inverted" - case .horizontalWhitespace: - return ".horizontalWhitespace" - case .notHorizontalWhitespace: - return ".horizontalWhitespace.inverted" - case .newlineSequence: - return ".newlineSequence" - case .notNewline: - return ".newlineSequence.inverted" - case .verticalWhitespace: - return ".verticalWhitespace" - case .notVerticalWhitespace: - return ".verticalWhitespace.inverted" - case .whitespace: - return ".whitespace" - case .notWhitespace: - return ".whitespace.inverted" - case .anyUnicodeScalar: - fatalError("Unsupported") - } - } -} - -extension AST.Atom.CharacterProperty { - var isUnprintableProperty: Bool { - switch kind { - case .ascii: - return true - case .binary(let b, value: _): - return isUnprintableBinary(b) - case .generalCategory(let gc): - return isUnprintableGeneralCategory(gc) - case .posix(let p): - return isUnprintablePOSIX(p) - case .script(_), .scriptExtension(_): - return true - default: - return false - } - } - - func isUnprintableBinary(_ binary: Unicode.BinaryProperty) -> Bool { - // List out the ones we can print because that list is smaller. - switch binary { - case .whitespace: - return false - default: - return true - } - } - - func isUnprintableGeneralCategory( - _ gc: Unicode.ExtendedGeneralCategory - ) -> Bool { - // List out the ones we can print because that list is smaller. - switch gc { - case .decimalNumber: - return false - default: - return true - } - } - - func isUnprintablePOSIX(_ posix: Unicode.POSIXProperty) -> Bool { - // List out the ones we can print because that list is smaller. - switch posix { - case .xdigit: - return false - case .word: - return false - default: - return true - } - } -} - -extension AST.Atom.CharacterProperty { - // TODO: Some way to integrate this with conversion... - var _patternBase: String { - if isUnprintableProperty { - return _regexBase - } - - return _dslBase - } - - var _dslBase: String { - switch kind { - case .binary(let bp, _): - switch bp { - case .whitespace: - return ".whitespace" - default: - return "" - } - - case .generalCategory(let gc): - switch gc { - case .decimalNumber: - return ".digit" - default: - return "" - } - - case .posix(let p): - switch p { - case .xdigit: - return ".hexDigit" - case .word: - return ".word" - default: - return "" - } - - default: - return "" - } - } - - var _regexBase: String { - switch kind { - case .ascii: - return "[:\(isInverted ? "^" : "")ascii:]" - - case .binary(let b, value: _): - if isInverted { - return "[^\\p{\(b.rawValue)}]" - } else { - return "\\p{\(b.rawValue)}" - } - - case .generalCategory(let gc): - if isInverted { - return "[^\\p{\(gc.rawValue)}]" - } else { - return "\\p{\(gc.rawValue)}" - } - - case .posix(let p): - return "[:\(isInverted ? "^" : "")\(p.rawValue):]" - - case .script(let s): - return "[:\(isInverted ? "^" : "")script=\(s.rawValue):]" - - case .scriptExtension(let s): - return "[:\(isInverted ? "^" : "")scx=\(s.rawValue):]" - - default: - return " // TODO: Property \(self)" - } - } -} - -extension AST.Atom { - var isUnprintableAtom: Bool { - switch kind { - case .keyboardControl, .keyboardMeta, .keyboardMetaControl: - return true - case .namedCharacter(_): - return true - case .property(let p): - return p.isUnprintableProperty - default: - return false - } - } -} - -extension AST.Atom { - /// Base string to use when rendering as a component in a - /// pattern. Note that when the atom is rendered individually, - /// it still may need to be wrapped in quotes. - /// - /// TODO: We want to coalesce adjacent atoms, likely in - /// caller, but we might want to be parameterized at that point. - /// - /// TODO: Some way to integrate this with conversion... - var _patternBase: (String, canBeWrapped: Bool) { - if let anchor = self.dslAssertionKind { - return (anchor._patternBase, false) - } - - if isUnprintableAtom { - return (_regexBase, false) - } - - return _dslBase - } - - var _dslBase: (String, canBeWrapped: Bool) { - switch kind { - case let .char(c): - return (String(c), false) - - case let .scalar(s): - return (s.value._dslBase, false) - - case let .scalarSequence(seq): - return (seq.scalarValues.map(\._dslBase).joined(), false) - - case let .property(p): - return (p._dslBase, true) - - case let .escaped(e): - switch e { - // Anchors - case .wordBoundary: - return ("Anchor.wordBoundary", false) - case .notWordBoundary: - return ("Anchor.wordBoundary.inverted", false) - case .startOfSubject: - return ("Anchor.startOfSubject", false) - case .endOfSubject: - return ("Anchor.endOfSubject", false) - case .endOfSubjectBeforeNewline: - return ("Anchor.endOfSubjectBeforeNewline", false) - case .firstMatchingPositionInSubject: - return ("Anchor.firstMatchingPositionInSubject", false) - case .textSegment: - return ("Anchor.textSegmentBoundary", false) - case .notTextSegment: - return ("Anchor.textSegmentBoundary.inverted", false) - - // Character Classes - case .decimalDigit: - return (".digit", true) - case .notDecimalDigit: - return (".digit.inverted", true) - case .horizontalWhitespace: - return (".horizontalWhitespace", true) - case .notHorizontalWhitespace: - return (".horizontalWhitespace.inverted", true) - case .whitespace: - return (".whitespace", true) - case .notWhitespace: - return (".whitespace.inverted", true) - case .wordCharacter: - return (".word", true) - case .notWordCharacter: - return (".word.inverted", true) - case .graphemeCluster: - return (".anyGraphemeCluster", true) - case .newlineSequence: - return (".newlineSequence", true) - case .notNewline: - return (".newlineSequence.inverted", true) - case .verticalTab: - return (".verticalWhitespace", true) - case .notVerticalTab: - return (".verticalWhitespace.inverted", true) - - // Literal single characters all get converted into DSLTree.Atom.scalar - - default: - return ("TODO: escaped \(e)", false) - } - - case .namedCharacter: - return (" /* TODO: named character */", false) - - case .dot: - // The DSL does not have an equivalent to '.', print as a regex. - return ("/./", false) - - case .caretAnchor, .dollarAnchor: - fatalError("unreachable") - - case .backreference: - return (" /* TODO: back reference */", false) - - case .subpattern: - return (" /* TODO: subpattern */", false) - - case .callout: - return (" /* TODO: callout */", false) - - case .backtrackingDirective: - return (" /* TODO: backtracking directive */", false) - - case .changeMatchingOptions: - return ("/* TODO: change matching options */", false) - - // Every other case we've already decided cannot be represented inside the - // DSL. - default: - return ("", false) - } - } - - var _regexBase: String { - switch kind { - case .char, .scalar, .scalarSequence: - return literalStringValue! - - case .invalid: - // TODO: Can we recover the original regex text from the source range? - return "<#value#>" - - case let .property(p): - return p._regexBase - - case let .escaped(e): - return "\\\(e.character)" - - case .keyboardControl(let k): - return "\\c\(k)" - - case .keyboardMeta(let k): - return "\\M-\(k)" - - case .keyboardMetaControl(let k): - return "\\M-\\C-\(k)" - - case .namedCharacter(let n): - return "\\N{\(n)}" - - case .dot: - return "." - - case .caretAnchor, .dollarAnchor: - fatalError("unreachable") - - case .backreference: - return " /* TODO: back reference */" - - case .subpattern: - return " /* TODO: subpattern */" - - case .callout: - return " /* TODO: callout */" - - case .backtrackingDirective: - return " /* TODO: backtracking directive */" - - case .changeMatchingOptions: - return "/* TODO: change matching options */" - } - } -} - -extension AST.Atom.Number { - var _patternBase: String { - value.map { "\($0)" } ?? "<#number#>" - } -} - -extension AST.Quantification.Amount { - var _patternBase: String { - switch self { - case .zeroOrMore: return "ZeroOrMore" - case .oneOrMore: return "OneOrMore" - case .zeroOrOne: return "Optionally" - case let .exactly(n): return "Repeat(count: \(n._patternBase))" - case let .nOrMore(n): return "Repeat(\(n._patternBase)...)" - case let .upToN(n): return "Repeat(...\(n._patternBase))" - case let .range(n, m): return "Repeat(\(n._patternBase)...\(m._patternBase))" - } - } - - var supportsInlineComponent: Bool { - switch self { - case .zeroOrMore: return true - case .oneOrMore: return true - case .zeroOrOne: return true - default: return false - } - } -} - -extension AST.Quantification.Kind { - var _patternBase: String { - switch self { - case .eager: return ".eager" - case .reluctant: return ".reluctant" - case .possessive: return ".possessive" - } - } -} - -extension DSLTree.QuantificationKind { - var _patternBase: String { - (ast ?? .eager)._patternBase - } -} - -extension DSLTree.CustomCharacterClass.Member { - var isUnprintableMember: Bool { - switch self { - case .atom(.unconverted(let a)): - return a.ast.isUnprintableAtom - case .custom(let c): - return c.hasUnprintableProperty - case .range(.unconverted(let lhs), .unconverted(let rhs)): - return lhs.ast.isUnprintableAtom || rhs.ast.isQuantifiable - case .intersection(let first, let second): - return first.hasUnprintableProperty || second.hasUnprintableProperty - case .subtraction(let first, let second): - return first.hasUnprintableProperty || second.hasUnprintableProperty - case .symmetricDifference(let first, let second): - return first.hasUnprintableProperty || second.hasUnprintableProperty - default: - return false - } - } -} - -extension DSLTree.CustomCharacterClass { - var hasUnprintableProperty: Bool { - members.contains { - $0.isUnprintableMember - } - } - - var isSimplePrint: Bool { - if members.count == 1 { - switch members[0] { - case .intersection(_, _): - return false - case .subtraction(_, _): - return false - case .symmetricDifference(_, _): - return false - default: - return true - } - } - - let nonCharMembers = members.filter { - switch $0 { - case let .atom(a): - switch a { - case .char(_): - return false - case .scalar(_): - return false - case .unconverted(_): - return true - default: - return true - } - - case .quotedLiteral(_): - return false - - case .trivia(_): - return false - - default: - return true - } - } - - if nonCharMembers.isEmpty { - return true - } - - return false - } -} - -extension DSLTree.Atom { - func _patternBase( - _ printer: inout PrettyPrinter - ) -> (String, canBeWrapped: Bool)? { - switch self { - case .any: - return (".any", true) - - case .anyNonNewline: - return (".anyNonNewline", true) - - case .dot: - // The DSL does not have an equivalent to '.', print as a regex. - return ("/./", false) - - case let .char(c): - return (String(c)._quoted, false) - - case let .scalar(s): - let hex = String(s.value, radix: 16, uppercase: true) - return ("\\u{\(hex)}"._bareQuoted, false) - - case let .unconverted(a): - if a.ast.isUnprintableAtom { - return ("#/\(a.ast._regexBase)/#", false) - } else { - return a.ast._dslBase - } - - case .assertion(let a): - return (a._patternBase, false) - case .characterClass(let cc): - return (cc._patternBase, true) - - case .backreference(_): - return ("/* TODO: backreferences */", false) - - case .symbolicReference: - return ("/* TODO: symbolic references */", false) - - case .changeMatchingOptions(let matchingOptions): - for add in matchingOptions.ast.adding { - switch add.kind { - case .reluctantByDefault: - printer.quantificationBehavior = .reluctant - default: - break - } - } - } - - return nil - } - - var _regexBase: String { - switch self { - case .any: - return "(?s:.)" - - case .anyNonNewline: - return "(?-s:.)" - - case .dot: - return "." - - case let .char(c): - return String(c) - - case let .scalar(s): - let hex = String(s.value, radix: 16, uppercase: true) - return "\\u{\(hex)}"._bareQuoted - - case let .unconverted(a): - return a.ast._regexBase - - case .assertion: - return "/* TODO: assertions */" - case .characterClass: - return "/* TODO: character classes */" - case .backreference: - return "/* TODO: backreferences */" - case .symbolicReference: - return "/* TODO: symbolic references */" - case .changeMatchingOptions(let matchingOptions): - var result = "" - - for add in matchingOptions.ast.adding { - switch add.kind { - case .reluctantByDefault: - result += "(?U)" - default: - break - } - } - - return result - } - } -} - -extension DSLTree.Node { - func getNamedCaptures() -> [String] { - var result: [String] = [] - - switch self { - case .capture(let name?, _, _, _): - result.append(name) - - case .concatenation(let nodes): - for node in nodes { - result += node.getNamedCaptures() - } - - case .convertedRegexLiteral(let node, _): - result += node.getNamedCaptures() - - case .quantification(_, _, let node): - result += node.getNamedCaptures() - - default: - break - } - - return result - } -} diff --git a/Sources/PatternConverter/Main.swift b/Sources/PatternConverter/Main.swift deleted file mode 100644 index 79372a4..0000000 --- a/Sources/PatternConverter/Main.swift +++ /dev/null @@ -1,60 +0,0 @@ -import Foundation -import RegexBuilder -@testable @_spi(RegexBuilder) import _StringProcessing - -@main -struct Main { - static func main() throws { - do { -// let builderDSL = CommandLine.arguments[1] - - let regex = Regex { - Optionally { - Capture { - Regex { - Capture { - Repeat(count: 3) { - One(.digit) - } - } - ChoiceOf { - "." - "-" - } - } - } - } - Capture { - Repeat(count: 3) { - One(.digit) - } - } - ChoiceOf { - "." - "-" - } - Capture { - Repeat(count: 4) { - One(.digit) - } - } - } - let converter = PatternConverter() - let pattern = try converter.convert(regex.root) - - let data = try JSONEncoder().encode(pattern) - print(String(data: data, encoding: .utf8) ?? "") - } catch { - print("\(error)", to:&standardError) - } - } -} - -var standardError = FileHandle.standardError - -extension FileHandle : TextOutputStream { - public func write(_ string: String) { - guard let data = string.data(using: .utf8) else { return } - self.write(data) - } -} diff --git a/Sources/PatternConverter/PatternConverter.swift b/Sources/PatternConverter/PatternConverter.swift deleted file mode 100644 index ac21f95..0000000 --- a/Sources/PatternConverter/PatternConverter.swift +++ /dev/null @@ -1,69 +0,0 @@ -import Foundation -import RegexBuilder -@testable import _RegexParser -@testable @_spi(RegexBuilder) import _StringProcessing - -struct PatternConverter { - func convert(_ root: DSLTree.Node) throws -> String { - emitRoot(root) - return "" - } - - func emitRoot(_ root: DSLTree.Node) { - for node in root.children { - emitNode(node) - } - } - - func emitNode(_ node: DSLTree.Node) { - switch node { - case .orderedChoice(let choice): - print("orderedChoice") - for node in choice { - emitNode(node) - } - case .concatenation(let concatenation): - print("concatenation") - for node in concatenation { - emitNode(node) - } - case .capture(name: let name, reference: let reference, let node, let transform): - print("capture") - emitNode(node) - case .nonCapturingGroup(let kind, let node): - print("nonCapturingGroup") - emitNode(node) - case .conditional(let kind, let thenNode, let elseNode): - print("conditional") - emitNode(thenNode) - emitNode(elseNode) - case .quantification(let amount, let kind, let node): - print("quantification") - - emitNode(node) - case .customCharacterClass(_): - print("customCharacterClass") - case .atom(_): - print("atom") - case .trivia(_): - print("trivia") - case .empty: - print("empty") - case .quotedLiteral(_): - print("quotedLiteral") - case .regexLiteral(_): - print("regexLiteral") - case .absentFunction(_): - print("absentFunction") - case .convertedRegexLiteral(let node, _): - print("convertedRegexLiteral") - emitNode(node) - case .consumer(_): - print("consumer") - case .matcher(_, _): - print("matcher") - case .characterPredicate(_): - print("characterPredicate") - } - } -} diff --git a/Tests/RegexTests/DSLParserTests.swift b/Tests/RegexTests/DSLParserTests.swift deleted file mode 100644 index f5fc209..0000000 --- a/Tests/RegexTests/DSLParserTests.swift +++ /dev/null @@ -1,48 +0,0 @@ -import Foundation -import XCTest -@testable import DSLParser - -class DSLParserTests: XCTestCase { - func testParseDSL() throws { - do { - let parser = DSLParser() - let tokens = try parser.parse(#"\d+a|b"#) - print(tokens) - } - do { - let parser = DSLParser() - let tokens = try parser.parse(#"gray|grey"#) - print(tokens) - } - do { - let parser = DSLParser() - let tokens = try parser.parse(#"a(?)\d+"#) - print(tokens) - } - do { - let parser = DSLParser() - let tokens = try parser.parse(#"hello"#) - print(tokens) - } - do { - let parser = DSLParser() - let tokens = try parser.parse(#"gray|grey"#) - print(tokens) - } - do { - let parser = DSLParser() - let tokens = try parser.parse(#"\d+"#) - print(tokens) - } - do { - let parser = DSLParser() - let tokens = try parser.parse(#"\d+a"#) - print(tokens) - } - do { - let parser = DSLParser() - let tokens = try parser.parse(#"\b(?:[a-eg-z]|f(?!oo))\w*\b"#) - print(tokens) - } - } -} diff --git a/Tests/RegexTests/DebuggerTests.swift b/Tests/RegexTests/DebuggerTests.swift new file mode 100644 index 0000000..17f01f0 --- /dev/null +++ b/Tests/RegexTests/DebuggerTests.swift @@ -0,0 +1,140 @@ +import Foundation +import XCTest +@testable import Debugger + +class DebuggerTests: XCTestCase { + func testDebugPattern01() throws { + let pattern = #"a(b|c)"# + let text = "ac" + + try run(pattern: pattern, text: text, matchingOptions: []) + let stepCount = Context.shared.stepCount + + let context = Context.shared + for i in 1...stepCount { + try run(pattern: pattern, text: text, matchingOptions: [], until: i) + print("step: \(context.stepCount), start: \(context.start), current: \(context.current), \(text[text.index(text.startIndex, offsetBy: context.start)..>>>>>> branch-a + """ + + try run(pattern: pattern, text: text, matchingOptions: []) + let stepCount = Context.shared.stepCount + print("stepCount: \(stepCount)") + + let context = Context.shared + for i in 1...stepCount { + try run(pattern: pattern, text: text, matchingOptions: [], until: i) + print("step: \(context.stepCount), start: \(context.start), current: \(context.current), \(text[text.index(text.startIndex, offsetBy: context.start)..