Skip to content

Commit 3ad9d2c

Browse files
natecook1000hamishknight
authored andcommitted
Implement atomic non-capturing group support
In addition to the (?>...) syntax, this is what's underneath `Local`.
1 parent 49d62af commit 3ad9d2c

File tree

5 files changed

+76
-9
lines changed

5 files changed

+76
-9
lines changed

Sources/_RegexParser/Regex/Parse/Sema.swift

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,8 @@ extension RegexValidator {
325325
func validateGroup(_ group: AST.Group) throws {
326326
let kind = group.kind
327327
switch kind.value {
328-
case .capture, .namedCapture, .nonCapture, .lookahead, .negativeLookahead:
328+
case .capture, .namedCapture, .nonCapture, .lookahead, .negativeLookahead,
329+
.atomicNonCapturing:
329330
break
330331

331332
case .balancedCapture:
@@ -336,9 +337,6 @@ extension RegexValidator {
336337
// We need to figure out how these interact with typed captures.
337338
throw error(.unsupported("branch reset group"), at: kind.location)
338339

339-
case .atomicNonCapturing:
340-
throw error(.unsupported("atomic group"), at: kind.location)
341-
342340
case .nonAtomicLookahead:
343341
throw error(.unsupported("non-atomic lookahead"), at: kind.location)
344342

Sources/_StringProcessing/ByteCodeGen.swift

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,38 @@ fileprivate extension Compiler.ByteCodeGen {
339339
builder.label(success)
340340
}
341341

342+
mutating func emitAtomicNoncapturingGroup(
343+
_ child: DSLTree.Node
344+
) throws {
345+
/*
346+
save(continuingAt: success)
347+
save(restoringAt: intercept)
348+
<sub-pattern> // failure restores at intercept
349+
clearThrough(intercept) // remove intercept and any leftovers from <sub-pattern>
350+
fail // ->success
351+
intercept:
352+
clearSavePoint // remove success
353+
fail // propagate failure
354+
success:
355+
...
356+
*/
357+
358+
let intercept = builder.makeAddress()
359+
let success = builder.makeAddress()
360+
361+
builder.buildSaveAddress(success)
362+
builder.buildSave(intercept)
363+
try emitNode(child)
364+
builder.buildClearThrough(intercept)
365+
builder.buildFail()
366+
367+
builder.label(intercept)
368+
builder.buildClear()
369+
builder.buildFail()
370+
371+
builder.label(success)
372+
}
373+
342374
mutating func emitMatcher(
343375
_ matcher: @escaping _MatcherInterface
344376
) -> ValueRegister {
@@ -384,6 +416,9 @@ fileprivate extension Compiler.ByteCodeGen {
384416
}
385417
options.apply(optionSequence)
386418
try emitNode(child)
419+
420+
case .atomicNonCapturing:
421+
try emitAtomicNoncapturingGroup(child)
387422

388423
default:
389424
// FIXME: Other kinds...

Tests/RegexBuilderTests/RegexDSLTests.swift

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,29 @@ class RegexDSLTests: XCTestCase {
467467
XCTAssertEqual("ab12".firstMatch(of: octoDecimalRegex)!.output.1, 61904)
468468
}
469469

470+
func testLocal() throws {
471+
try _testDSLCaptures(
472+
("aaaaa", nil),
473+
matchType: Substring.self, ==)
474+
{
475+
Local {
476+
OneOrMore("a")
477+
}
478+
"a"
479+
}
480+
481+
try _testDSLCaptures(
482+
("aa", "aa"),
483+
("aaa", nil),
484+
matchType: Substring.self, ==)
485+
{
486+
Local {
487+
OneOrMore("a", .reluctant)
488+
}
489+
"a"
490+
}
491+
}
492+
470493
func testAssertions() throws {
471494
try _testDSLCaptures(
472495
("aaaaab", "aaaaab"),

Tests/RegexTests/MatchTests.swift

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,14 +1057,25 @@ extension RegexTests {
10571057
firstMatchTest(
10581058
#"(?:a|.b)c"#, input: "123abcacxyz", match: "abc")
10591059
firstMatchTest(
1060-
#"(?>a|.b)c"#, input: "123abcacxyz", match: "ac", xfail: true)
1060+
#"(?>a|.b)c"#, input: "123abcacxyz", match: "ac")
10611061
firstMatchTest(
1062-
"(*atomic:a|.b)c", input: "123abcacxyz", match: "ac", xfail: true)
1062+
"(*atomic:a|.b)c", input: "123abcacxyz", match: "ac")
10631063
firstMatchTest(
10641064
#"(?:a+)[a-z]c"#, input: "123aacacxyz", match: "aac")
10651065
firstMatchTest(
1066-
#"(?>a+)[a-z]c"#, input: "123aacacxyz", match: "ac", xfail: true)
1066+
#"(?>a+)[a-z]c"#, input: "123aacacxyz", match: nil)
1067+
1068+
// Atomicity should stay in the atomic group
1069+
firstMatchTest(
1070+
#"(?:(?>a)|.b)c"#, input: "123abcacxyz", match: "abc")
10671071

1072+
// Quantifier behavior inside atomic
1073+
firstMatchTest(
1074+
#"^(?>a+?)a$"#, input: "aa", match: "aa")
1075+
firstMatchTest(
1076+
#"^(?>a+?)a$"#, input: "aaa", match: nil)
1077+
firstMatchTest(
1078+
#"(?>a++)a"#, input: "aaa", match: nil)
10681079

10691080
// TODO: Test example where non-atomic is significant
10701081
firstMatchTest(

Tests/RegexTests/ParseTests.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -979,10 +979,10 @@ extension RegexTests {
979979
concat("a", nonCaptureReset("b"), "c"), throwsError: .unsupported)
980980
parseTest(
981981
#"a(?>b)c"#,
982-
concat("a", atomicNonCapturing("b"), "c"), throwsError: .unsupported)
982+
concat("a", atomicNonCapturing("b"), "c"))
983983
parseTest(
984984
"a(*atomic:b)c",
985-
concat("a", atomicNonCapturing("b"), "c"), throwsError: .unsupported)
985+
concat("a", atomicNonCapturing("b"), "c"))
986986

987987
parseTest("a(?=b)c", concat("a", lookahead("b"), "c"))
988988
parseTest("a(*pla:b)c", concat("a", lookahead("b"), "c"))

0 commit comments

Comments
 (0)