Skip to content

Commit 6236e84

Browse files
authored
Fix AttributedString slicing behavior on range expressions (#727)
`AttributedString` goes out of its way to emulate the stdlib’s behavior for `String` slicing. Unfortunately, it went a little overboard: ```swift import Foundation let str1 = "F\u{301}a\u{308}n\u{303}c\u{327}y\u{30a}" // "F́äñçẙ" let str2 = AttributedString(str1) let substr1 = str1[...str1.startIndex] // “F\u{301}” let substr2 = str2[...str2.startIndex] // “F” print(Array(substr1.unicodeScalars)) print(Array(substr2.unicodeScalars)) ``` We expect slicing on range expressions to always produce consistent results between `String` and `AttributedString`. When slicing character-based string views on `…str.startIndex`, we naturally expect the range expression to cover the entire first `Character`, and that is precisely what `String` does. `AttributedString`’s default view is not as obviously `Character`-based as `String` is, but emulating the stdlib’s behavior is much preferable than to allow such simple, character-aligned range expressions to slice the attributed string at random sub-character positions.
1 parent ab2cd9c commit 6236e84

File tree

2 files changed

+70
-6
lines changed

2 files changed

+70
-6
lines changed

Sources/FoundationEssentials/AttributedString/AttributedString.swift

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,9 @@ extension AttributedString {
292292

293293
public mutating func replaceSubrange(_ range: some RangeExpression<Index>, with s: some AttributedStringProtocol) {
294294
ensureUniqueReference()
295-
// Note: we allow sub-Character ranges, so we must use `unicodeScalars` here, not `characters`.
296-
let subrange = range.relative(to: unicodeScalars)._bstringRange
295+
// Note: slicing generally allows sub-Character ranges, but we need to resolve range
296+
// expressions using the characters view, to remain consistent with the stdlib.
297+
let subrange = range.relative(to: characters)._bstringRange
297298
_guts.replaceSubrange(subrange, with: s)
298299
}
299300
}
@@ -327,14 +328,16 @@ extension AttributedString {
327328
extension AttributedString {
328329
public subscript(bounds: some RangeExpression<Index>) -> AttributedSubstring {
329330
get {
330-
// Note: we allow sub-Character ranges, so we must use `unicodeScalars` here, not `characters`.
331-
let bounds = bounds.relative(to: unicodeScalars)
331+
// Note: slicing generally allows sub-Character ranges, but we need to resolve range
332+
// expressions using the characters view, to remain consistent with the stdlib.
333+
let bounds = bounds.relative(to: characters)
332334
return AttributedSubstring(_guts, in: bounds._bstringRange)
333335
}
334336
_modify {
335337
ensureUniqueReference()
336-
// Note: we allow sub-Character ranges, so we must use `unicodeScalars` here, not `characters`.
337-
let bounds = bounds.relative(to: unicodeScalars)
338+
// Note: slicing generally allows sub-Character ranges, but we need to resolve range
339+
// expressions using the characters view, to remain consistent with the stdlib.
340+
let bounds = bounds.relative(to: characters)
338341
var substr = AttributedSubstring(_guts, in: bounds._bstringRange)
339342
let ident = Self._nextModifyIdentity
340343
substr._identity = ident
@@ -348,6 +351,10 @@ extension AttributedString {
348351
yield &substr
349352
}
350353
set {
354+
// Note: slicing generally allows sub-Character ranges, but we need to resolve range
355+
// expressions using the characters view, to remain consistent with the stdlib.
356+
let bounds = bounds.relative(to: characters)
357+
351358
// FIXME: Why is this allowed if _modify traps on replacement?
352359
self.replaceSubrange(bounds, with: newValue)
353360
}

Tests/FoundationEssentialsTests/AttributedString/AttributedStringTests.swift

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1857,6 +1857,63 @@ E {
18571857
XCTAssertEqual(b.count, 0)
18581858
}
18591859

1860+
func testCharacterSlicing_RangeExpressions() {
1861+
// Make sure `AttributedString` and `String` produce consistent results when slicing,
1862+
// for every range expression, whether or not the bounds fall on `Character` boundaries.
1863+
//
1864+
// (SE-0180 mistakenly prevented `String` from rounding down indices to Character boundaries
1865+
// when slicing, and `AttributedString` has to emulate that choice. However,
1866+
// `AttributedSubstring` (intentionally, and unavoidably) has to round down the boundaries
1867+
// of its character view -- so we expect some differences when comparing `characters`.
1868+
// The substring's `unicodeScalars` view always gives us the precise original boundaries.)
1869+
1870+
let str = "F\u{301}a\u{308}n\u{303}c\u{327}y\u{30a}" // "F́äñçẙ"
1871+
let astr = AttributedString(str)
1872+
1873+
func check<T: Equatable>(
1874+
_ a: some Sequence<T>,
1875+
_ b: some Sequence<T>,
1876+
file: StaticString = #file, line: UInt = #line
1877+
) {
1878+
XCTAssertTrue(
1879+
a.elementsEqual(b),
1880+
"'\(Array(a))' does not equal '\(Array(b))'",
1881+
file: file, line: line)
1882+
}
1883+
1884+
check(str, astr.characters)
1885+
check(str.unicodeScalars, astr.unicodeScalars)
1886+
1887+
// Go through all valid range expressions within the two strings and compare slicing
1888+
// results.
1889+
var i1 = str.unicodeScalars.startIndex
1890+
var i2 = astr.unicodeScalars.startIndex
1891+
while true {
1892+
check(str[..<i1].unicodeScalars, astr[..<i2].unicodeScalars)
1893+
check(str[i1...].unicodeScalars, astr[i2...].unicodeScalars)
1894+
1895+
var j1 = i1
1896+
var j2 = i2
1897+
while true {
1898+
check(str[i1..<j1].unicodeScalars, astr[i2..<j2].unicodeScalars)
1899+
1900+
if j1 == str.endIndex { break }
1901+
1902+
check(str[i1...j1].unicodeScalars, astr[i2...j2].unicodeScalars)
1903+
1904+
str.unicodeScalars.formIndex(after: &j1)
1905+
j2 = astr.index(afterUnicodeScalar: j2)
1906+
}
1907+
1908+
if i1 == str.endIndex { break }
1909+
1910+
check(str[...i1].unicodeScalars, astr[...i2].unicodeScalars)
1911+
1912+
str.unicodeScalars.formIndex(after: &i1)
1913+
i2 = astr.index(afterUnicodeScalar: i2)
1914+
}
1915+
}
1916+
18601917
func testUnicodeScalarsSlicing() {
18611918
let attrStr = AttributedString("Cafe\u{301}", attributes: AttributeContainer().testInt(1))
18621919
let range = attrStr.startIndex ..< attrStr.endIndex

0 commit comments

Comments
 (0)