Skip to content

Commit d073d80

Browse files
Abseil Teamcopybara-github
authored andcommitted
Performance improvement for absl::AsciiStrToUpper() and absl::AsciiStrToLower()
PiperOrigin-RevId: 608661989 Change-Id: Ibfd94f8b2d23fd232bf93904ed68e11a400b3644
1 parent a7012a5 commit d073d80

File tree

1 file changed

+8
-63
lines changed

1 file changed

+8
-63
lines changed

absl/strings/ascii.cc

Lines changed: 8 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,8 @@
1515
#include "absl/strings/ascii.h"
1616

1717
#include <climits>
18-
#include <cstdint>
1918
#include <cstring>
2019
#include <string>
21-
#include <type_traits>
2220

2321
#include "absl/base/config.h"
2422
#include "absl/base/nullability.h"
@@ -162,19 +160,6 @@ ABSL_DLL const char kToUpper[256] = {
162160
};
163161
// clang-format on
164162

165-
template <class T>
166-
static constexpr T BroadcastByte(unsigned char value) {
167-
static_assert(std::is_integral<T>::value && sizeof(T) <= sizeof(uint64_t) &&
168-
std::is_unsigned<T>::value,
169-
"only unsigned integers up to 64-bit allowed");
170-
T result = value;
171-
constexpr size_t result_bit_width = sizeof(result) * CHAR_BIT;
172-
result |= result << ((CHAR_BIT << 0) & (result_bit_width - 1));
173-
result |= result << ((CHAR_BIT << 1) & (result_bit_width - 1));
174-
result |= result << ((CHAR_BIT << 2) & (result_bit_width - 1));
175-
return result;
176-
}
177-
178163
// Returns whether `c` is in the a-z/A-Z range (w.r.t. `ToUpper`).
179164
// Implemented by:
180165
// 1. Pushing the a-z/A-Z range to [SCHAR_MIN, SCHAR_MIN + 26).
@@ -190,64 +175,24 @@ constexpr bool AsciiInAZRange(unsigned char c) {
190175
}
191176

192177
template <bool ToUpper>
193-
static constexpr char* PartialAsciiStrCaseFold(absl::Nonnull<char*> p,
194-
absl::Nonnull<char*> end) {
195-
using vec_t = size_t;
196-
const size_t n = static_cast<size_t>(end - p);
197-
198-
// SWAR algorithm: http://0x80.pl/notesen/2016-01-06-swar-swap-case.html
199-
constexpr char ch_a = ToUpper ? 'a' : 'A', ch_z = ToUpper ? 'z' : 'Z';
200-
char* const swar_end = p + (n / sizeof(vec_t)) * sizeof(vec_t);
201-
while (p < swar_end) {
202-
vec_t v = vec_t();
203-
204-
// memcpy the vector, but constexpr
205-
for (size_t i = 0; i < sizeof(vec_t); ++i) {
206-
v |= static_cast<vec_t>(static_cast<unsigned char>(p[i]))
207-
<< (i * CHAR_BIT);
208-
}
209-
210-
constexpr unsigned int msb = 1u << (CHAR_BIT - 1);
211-
const vec_t v_msb = v & BroadcastByte<vec_t>(msb);
212-
const vec_t v_nonascii_mask = (v_msb << 1) - (v_msb >> (CHAR_BIT - 1));
213-
const vec_t v_nonascii = v & v_nonascii_mask;
214-
const vec_t v_ascii = v & ~v_nonascii_mask;
215-
const vec_t a = v_ascii + BroadcastByte<vec_t>(msb - ch_a - 0),
216-
z = v_ascii + BroadcastByte<vec_t>(msb - ch_z - 1);
217-
v = v_nonascii | (v_ascii ^ ((a ^ z) & BroadcastByte<vec_t>(msb)) >> 2);
218-
219-
// memcpy the vector, but constexpr
220-
for (size_t i = 0; i < sizeof(vec_t); ++i) {
221-
p[i] = static_cast<char>(v >> (i * CHAR_BIT));
222-
}
223-
224-
p += sizeof(v);
225-
}
226-
227-
return p;
228-
}
229-
230-
template <bool ToUpper>
231-
static constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p,
232-
absl::Nonnull<char*> end) {
178+
constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p,
179+
absl::Nonnull<char*> end) {
233180
// The upper- and lowercase versions of ASCII characters differ by only 1 bit.
234181
// When we need to flip the case, we can xor with this bit to achieve the
235182
// desired result. Note that the choice of 'a' and 'A' here is arbitrary. We
236183
// could have chosen 'z' and 'Z', or any other pair of characters as they all
237184
// have the same single bit difference.
238185
constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A';
239186

240-
using vec_t = size_t;
241-
// TODO(b/316380338): When FDO becomes able to vectorize these,
242-
// revert this manual optimization and just leave the naive loop.
243-
if (static_cast<size_t>(end - p) >= sizeof(vec_t)) {
244-
p = ascii_internal::PartialAsciiStrCaseFold<ToUpper>(p, end);
245-
}
246-
while (p < end) {
187+
#ifdef __clang__
188+
// Temporary workaround until the mentioned bug is fixed.
189+
// NOLINTNEXTLINE(whitespace/line_length)
190+
#pragma clang loop vectorize(enable)
191+
#endif
192+
for (; p < end; ++p) {
247193
unsigned char v = static_cast<unsigned char>(*p);
248194
v ^= AsciiInAZRange<ToUpper>(v) ? kAsciiCaseBitFlip : 0;
249195
*p = static_cast<char>(v);
250-
++p;
251196
}
252197
}
253198

0 commit comments

Comments
 (0)