Skip to content

Commit 437a781

Browse files
committed
Merge branch 'main' into github-actions-support
2 parents 5904a34 + c7c539d commit 437a781

File tree

5 files changed

+74
-6
lines changed

5 files changed

+74
-6
lines changed

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,9 @@ extension Parser {
331331
///
332332
/// Diagnoses on overflow
333333
///
334-
mutating func lexNumber(_ kind: RadixKind = .decimal) -> AST.Atom.Number? {
334+
mutating func lexNumber(
335+
_ kind: RadixKind = .decimal
336+
) -> AST.Atom.Number? {
335337
guard let str = tryEatPrefix(kind.characterFilter) else {
336338
return nil
337339
}
@@ -342,6 +344,26 @@ extension Parser {
342344
return .init(i, at: str.location)
343345
}
344346

347+
/// Try to eat a quantification bound, such as appears in `/x{3,12}`
348+
///
349+
/// Returns: `nil` if there's no number, otherwise the number
350+
///
351+
/// Diagnoses on overflow. Currently, we will diagnose for any values over `UInt16.max`
352+
///
353+
mutating func lexQuantBound() -> AST.Atom.Number? {
354+
let kind = RadixKind.decimal
355+
guard let str = tryEatPrefix(kind.characterFilter) else {
356+
return nil
357+
}
358+
guard let i = UInt16(str.value, radix: kind.radix) else {
359+
error(.numberOverflow(str.value), at: str.location)
360+
return .init(nil, at: str.location)
361+
}
362+
363+
return .init(Int(i), at: str.location)
364+
}
365+
366+
345367
/// Expect a number of a given `kind`, diagnosing if a number cannot be
346368
/// parsed.
347369
mutating func expectNumber(_ kind: RadixKind = .decimal) -> AST.Atom.Number {
@@ -492,7 +514,7 @@ extension Parser {
492514

493515
return p.tryEating { p in
494516
guard p.tryEat("{"),
495-
let range = p.lexRange(trivia: &trivia),
517+
let range = p.lexQuantRange(trivia: &trivia),
496518
p.tryEat("}")
497519
else { return nil }
498520
return range.value
@@ -519,12 +541,14 @@ extension Parser {
519541
/// | ExpRange
520542
/// ExpRange -> '..<' <Int> | '...' <Int>
521543
/// | <Int> '..<' <Int> | <Int> '...' <Int>?
522-
mutating func lexRange(trivia: inout [AST.Trivia]) -> Located<Quant.Amount>? {
544+
mutating func lexQuantRange(
545+
trivia: inout [AST.Trivia]
546+
) -> Located<Quant.Amount>? {
523547
recordLoc { p in
524548
p.tryEating { p in
525549
if let t = p.lexWhitespace() { trivia.append(t) }
526550

527-
let lowerOpt = p.lexNumber()
551+
let lowerOpt = p.lexQuantBound()
528552

529553
if let t = p.lexWhitespace() { trivia.append(t) }
530554

@@ -546,7 +570,7 @@ extension Parser {
546570

547571
if let t = p.lexWhitespace() { trivia.append(t) }
548572

549-
var upperOpt = p.lexNumber()
573+
var upperOpt = p.lexQuantBound()
550574
if closedRange == false {
551575
// If we have an open range, the upper bound should be adjusted down.
552576
upperOpt?.value? -= 1

Sources/_StringProcessing/Unicode/WordBreaking.swift

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,10 @@ extension String {
8787
var j = maxIndex ?? range.lowerBound
8888

8989
while j < range.upperBound, j <= i {
90-
cache!.insert(j)
90+
// Workaround for underlying issue in https://github.com/swiftlang/swift-experimental-string-processing/issues/818
91+
let (inserted, _) = cache!.insert(j)
92+
guard inserted else { return true }
93+
9194
j = _wordIndex(after: j)
9295
}
9396

Tests/RegexBuilderTests/RegexDSLTests.swift

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1946,6 +1946,22 @@ extension RegexDSLTests {
19461946
XCTAssertEqual(anyOutput[15].value as? Int, 123)
19471947
XCTAssertEqual(anyOutput[16].substring, "456")
19481948
}
1949+
1950+
func testIssue818() throws {
1951+
// Original report from https://github.com/swiftlang/swift-experimental-string-processing/issues/818
1952+
let clip = "⁠‘⁠⁠example.com⁠⁠’"
1953+
let clip2 = "\u{2060}\u{2018}\u{2060}\u{2060}example.com\u{2060}\u{2060}\u{2019}"
1954+
assert(clip.unicodeScalars.elementsEqual(clip2.unicodeScalars))
1955+
1956+
let pattern = Regex {
1957+
Anchor.wordBoundary // line A
1958+
"example"
1959+
Anchor.wordBoundary // line B
1960+
}
1961+
1962+
XCTAssertNotNil(clip.contains(pattern))
1963+
XCTAssertNotNil(clip2.contains(pattern))
1964+
}
19491965
}
19501966

19511967
extension Unicode.Scalar {

Tests/RegexTests/LexTests.swift

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,25 @@ extension RegexTests {
6363
_ = p.lexNumber()
6464
}
6565

66+
let invalidQuantBounds: Array<String> = [
67+
"65536", // UInt16.max + 1
68+
"2147483646", // Int32.max - 1
69+
"9223372036854775806", // Int64.max - 1
70+
]
71+
72+
for invalidNum in invalidQuantBounds {
73+
let regexes: Array<String> = [
74+
"x{\(invalidNum)}",
75+
"x{1,\(invalidNum)}",
76+
"x{\(invalidNum),1}",
77+
]
78+
for regex in regexes {
79+
diagnose(regex, expecting: .numberOverflow(invalidNum)) { p in
80+
_ = p.parse()
81+
}
82+
}
83+
}
84+
6685
// TODO: want to dummy print out source ranges, etc, test that.
6786
}
6887

Tests/RegexTests/MatchTests.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,12 @@ extension RegexTests {
751751
firstMatchTest("(?U)a??", input: "a", match: "a")
752752
firstMatchTest("(?U)a??a", input: "aaa", match: "aa")
753753

754+
// Quantification syntax is somewhat dependent on the contents.
755+
// In JS, PCRE2, Python, and some others, /x{-1}/ will be literally "x{-1}"
756+
// Note that Java8 and Rust throw an (unhelpful) error
757+
firstMatchTest("x{-1}", input: "x{-1}", match: "x{-1}")
758+
firstMatchTest("x{-1}", input: "xax{-2}bx{-1}c", match: "x{-1}")
759+
754760
// TODO: After captures, easier to test these
755761
}
756762

0 commit comments

Comments
 (0)