|
13 | 13 | //===----------------------------------------------------------------------===//
|
14 | 14 |
|
15 | 15 | internal func _allASCII(_ input: UnsafeBufferPointer<UInt8>) -> Bool {
|
16 |
| - if input.isEmpty { return true } |
17 |
| - |
18 |
| - // NOTE: Avoiding for-in syntax to avoid bounds checks |
19 |
| - // |
20 |
| - // TODO(String performance): SIMD-ize |
21 |
| - // |
22 |
| - let count = input.count |
23 |
| - var ptr = unsafe UnsafeRawPointer(input.baseAddress._unsafelyUnwrappedUnchecked) |
24 |
| - |
25 |
| - let asciiMask64 = 0x8080_8080_8080_8080 as UInt64 |
26 |
| - let asciiMask32 = UInt32(truncatingIfNeeded: asciiMask64) |
27 |
| - let asciiMask16 = UInt16(truncatingIfNeeded: asciiMask64) |
28 |
| - let asciiMask8 = UInt8(truncatingIfNeeded: asciiMask64) |
29 |
| - |
30 |
| - let end128 = unsafe ptr + count & ~(MemoryLayout<(UInt64, UInt64)>.stride &- 1) |
31 |
| - let end64 = unsafe ptr + count & ~(MemoryLayout<UInt64>.stride &- 1) |
32 |
| - let end32 = unsafe ptr + count & ~(MemoryLayout<UInt32>.stride &- 1) |
33 |
| - let end16 = unsafe ptr + count & ~(MemoryLayout<UInt16>.stride &- 1) |
34 |
| - let end = unsafe ptr + count |
35 |
| - |
36 |
| - |
37 |
| - while unsafe ptr < end128 { |
38 |
| - let pair = unsafe ptr.loadUnaligned(as: (UInt64, UInt64).self) |
39 |
| - let result = (pair.0 | pair.1) & asciiMask64 |
40 |
| - guard result == 0 else { return false } |
41 |
| - unsafe ptr = unsafe ptr + MemoryLayout<(UInt64, UInt64)>.stride |
| 16 | + //--------------- Implementation building blocks ---------------------------// |
| 17 | +#if arch(arm64_32) |
| 18 | + typealias Word = UInt64 |
| 19 | +#else |
| 20 | + typealias Word = UInt |
| 21 | +#endif |
| 22 | + let mask = Word(truncatingIfNeeded: 0x80808080_80808080 as UInt64) |
| 23 | + |
| 24 | +#if (arch(i386) || arch(x86_64)) && SWIFT_STDLIB_ENABLE_VECTOR_TYPES |
| 25 | + // TODO: Should consider AVX2 / AVX512 / AVX10 path here |
| 26 | + typealias Block = (SIMD16<UInt8>, SIMD16<UInt8>) |
| 27 | + @_transparent func pmovmskb(_ vec: SIMD16<UInt8>) -> UInt16 { |
| 28 | + UInt16(Builtin.bitcast_Vec16xInt1_Int16( |
| 29 | + Builtin.cmp_slt_Vec16xInt8(vec._storage._value, Builtin.zeroInitializer()) |
| 30 | + )) |
| 31 | + } |
| 32 | +#elseif (arch(arm64) || arch(arm64_32)) && SWIFT_STDLIB_ENABLE_VECTOR_TYPES |
| 33 | + typealias Block = (SIMD16<UInt8>, SIMD16<UInt8>) |
| 34 | + @_transparent func umaxv(_ vec: SIMD16<UInt8>) -> UInt8 { |
| 35 | + UInt8(Builtin.int_vector_reduce_umax_Vec16xInt8(vec._storage._value)) |
42 | 36 | }
|
| 37 | +#else |
| 38 | + typealias Block = (Word, Word, Word, Word) |
| 39 | +#endif |
43 | 40 |
|
44 |
| - // If we had enough bytes for two iterations of this, we would have hit |
45 |
| - // the loop above, so we only need to do this once |
46 |
| - if unsafe ptr < end64 { |
47 |
| - let value = unsafe ptr.loadUnaligned(as: UInt64.self) |
48 |
| - guard value & asciiMask64 == 0 else { return false } |
49 |
| - unsafe ptr = unsafe ptr + MemoryLayout<UInt64>.stride |
| 41 | + @_transparent |
| 42 | + func allASCII(wordAt pointer: UnsafePointer<UInt8>) -> Bool { |
| 43 | + let word = unsafe UnsafeRawPointer(pointer).loadUnaligned(as: Word.self) |
| 44 | + return word & mask == 0 |
50 | 45 | }
|
51 | 46 |
|
52 |
| - if unsafe ptr < end32 { |
53 |
| - let value = unsafe ptr.loadUnaligned(as: UInt32.self) |
54 |
| - guard value & asciiMask32 == 0 else { return false } |
55 |
| - unsafe ptr = unsafe ptr + MemoryLayout<UInt32>.stride |
| 47 | + @_transparent |
| 48 | + func allASCII(blockAt pointer: UnsafePointer<UInt8>) -> Bool { |
| 49 | + let block = unsafe UnsafeRawPointer(pointer).loadUnaligned(as: Block.self) |
| 50 | +#if (arch(i386) || arch(x86_64)) && SWIFT_STDLIB_ENABLE_VECTOR_TYPES |
| 51 | + return pmovmskb(block.0 | block.1) == 0 |
| 52 | +#elseif (arch(arm64) || arch(arm64_32)) && SWIFT_STDLIB_ENABLE_VECTOR_TYPES |
| 53 | + return umaxv(block.0 | block.1) < 0x80 |
| 54 | +#else |
| 55 | + return (block.0 | block.1 | block.2 | block.3) & mask == 0 |
| 56 | +#endif |
| 57 | + } |
| 58 | + //----------------------- Implementation proper ----------------------------// |
| 59 | + guard input.count >= MemoryLayout<Word>.size else { |
| 60 | + // They gave us a region of memory |
| 61 | + // whose size is as modest as it can be. |
| 62 | + // We'll check every byte |
| 63 | + // for the bit of most height |
| 64 | + // and return if we happen on any |
| 65 | + // |
| 66 | + // I'm sorry, I'm sorry, I'm trying to delete it. (This chunk of code, not |
| 67 | + // the Limerick. I would wager that--at least for Strings--we could |
| 68 | + // unconditionally load 16B here,¹ because of the small string encoding, |
| 69 | + // and check them all at once, which would be much more efficient. That |
| 70 | + // probably has to happen by lifting this check into the SmallString |
| 71 | + // initializer directly, though.) |
| 72 | + // |
| 73 | + // ¹ well, most of the time, which makes it a rather conditional |
| 74 | + // "unconditionally". |
| 75 | + return unsafe input.allSatisfy { $0 < 0x80 } |
56 | 76 | }
|
57 | 77 |
|
58 |
| - if unsafe ptr < end16 { |
59 |
| - let value = unsafe ptr.loadUnaligned(as: UInt16.self) |
60 |
| - guard value & asciiMask16 == 0 else { return false } |
61 |
| - unsafe ptr = unsafe ptr + MemoryLayout<UInt16>.stride |
| 78 | + // bytes.count is non-zero, so we can unconditionally unwrap baseAddress. |
| 79 | + let base = unsafe input.baseAddress._unsafelyUnwrappedUnchecked |
| 80 | + let n = input.count |
| 81 | + var i = 0 |
| 82 | + |
| 83 | + guard n >= MemoryLayout<Block>.size else { |
| 84 | + // The size isn't yet to a block |
| 85 | + // word-by-word we are forced to walk. |
| 86 | + // So as to not leave a gap |
| 87 | + // the last word may lap |
| 88 | + // the word that we already chalked. |
| 89 | + // |
| 90 | + // 0 k 2k 3k ?k n-k n-1 |
| 91 | + // | | | | | | | |
| 92 | + // +------+------+------+ +------+ | |
| 93 | + // | word | word | word | ... | word | | |
| 94 | + // +------+------+------+ +------+ v |
| 95 | + // +------+ |
| 96 | + // possibly overlapping final word > | word | |
| 97 | + // +------+ |
| 98 | + // |
| 99 | + // This means that we check any bytes in the overlap region twice, but |
| 100 | + // that's much preferrable to using smaller accesses to avoid rechecking, |
| 101 | + // because the entire last word is about as expensive as checking just |
| 102 | + // one byte would be, and on average there's more than one byte remaining. |
| 103 | + // |
| 104 | + // Note that we don't bother trying to align any of these accesses, because |
| 105 | + // there is minimal benefit to doing so on "modern" OoO cores, which can |
| 106 | + // handle cacheline-crossing loads at full speed. If the string happens to |
| 107 | + // be aligned, they'll be aligned, if not, they won't be. It will likely |
| 108 | + // make sense to add a path that does align everything for more limited |
| 109 | + // embedded CPUs, though. |
| 110 | + let k = MemoryLayout<Word>.size |
| 111 | + let last = n &- k |
| 112 | + while i < last { |
| 113 | + guard unsafe allASCII(wordAt: base + i) else { return false } |
| 114 | + i &+= k |
| 115 | + } |
| 116 | + return unsafe allASCII(wordAt: base + last) |
62 | 117 | }
|
63 |
| - |
64 |
| - if unsafe ptr < end { |
65 |
| - let value = unsafe ptr.loadUnaligned(fromByteOffset: 0, as: UInt8.self) |
66 |
| - guard value & asciiMask8 == 0 else { return false } |
| 118 | + |
| 119 | + // check block-by-block, with a possibly overlapping last block to avoid |
| 120 | + // sub-block cleanup. We should be able to avoid manual index arithmetic |
| 121 | + // and write this loop and the one above something like the following: |
| 122 | + // |
| 123 | + // return stride(from: 0, to: last, by: k).allSatisfy { |
| 124 | + // allASCII(blockAt: base + $0) |
| 125 | + // } && allASCII(blockAt: base + last) |
| 126 | + // |
| 127 | + // but LLVM leaves one unnecessary conditional operation in the loop |
| 128 | + // when we do that, so we write them out as while loops instead for now. |
| 129 | + let k = MemoryLayout<Block>.size |
| 130 | + let last = n &- k |
| 131 | + while i < last { |
| 132 | + guard unsafe allASCII(blockAt: base + i) else { return false } |
| 133 | + i &+= k |
67 | 134 | }
|
68 |
| - unsafe _internalInvariant(ptr == end || ptr + 1 == end) |
69 |
| - return true |
| 135 | + return unsafe allASCII(blockAt: base + last) |
70 | 136 | }
|
71 | 137 |
|
72 | 138 | extension String {
|
|
0 commit comments