1515#include " absl/strings/ascii.h"
1616
1717#include < climits>
18+ #include < cstdint>
1819#include < cstring>
1920#include < string>
21+ #include < type_traits>
2022
2123#include " absl/base/config.h"
2224#include " absl/base/nullability.h"
@@ -160,6 +162,19 @@ ABSL_DLL const char kToUpper[256] = {
160162};
161163// clang-format on
162164
165+ template <class T >
166+ static constexpr T BroadcastByte (unsigned char value) {
167+ static_assert (std::is_integral<T>::value && sizeof (T) <= sizeof (uint64_t ) &&
168+ std::is_unsigned<T>::value,
169+ " only unsigned integers up to 64-bit allowed" );
170+ T result = value;
171+ constexpr size_t result_bit_width = sizeof (result) * CHAR_BIT;
172+ result |= result << ((CHAR_BIT << 0 ) & (result_bit_width - 1 ));
173+ result |= result << ((CHAR_BIT << 1 ) & (result_bit_width - 1 ));
174+ result |= result << ((CHAR_BIT << 2 ) & (result_bit_width - 1 ));
175+ return result;
176+ }
177+
163178// Returns whether `c` is in the a-z/A-Z range (w.r.t. `ToUpper`).
164179// Implemented by:
165180// 1. Pushing the a-z/A-Z range to [SCHAR_MIN, SCHAR_MIN + 26).
@@ -175,24 +190,64 @@ constexpr bool AsciiInAZRange(unsigned char c) {
175190}
176191
177192template <bool ToUpper>
178- constexpr void AsciiStrCaseFold (absl::Nonnull<char *> p,
179- absl::Nonnull<char *> end) {
193+ static constexpr char * PartialAsciiStrCaseFold (absl::Nonnull<char *> p,
194+ absl::Nonnull<char *> end) {
195+ using vec_t = size_t ;
196+ const size_t n = static_cast <size_t >(end - p);
197+
198+ // SWAR algorithm: http://0x80.pl/notesen/2016-01-06-swar-swap-case.html
199+ constexpr char ch_a = ToUpper ? ' a' : ' A' , ch_z = ToUpper ? ' z' : ' Z' ;
200+ char * const swar_end = p + (n / sizeof (vec_t )) * sizeof (vec_t );
201+ while (p < swar_end) {
202+ vec_t v = vec_t ();
203+
204+ // memcpy the vector, but constexpr
205+ for (size_t i = 0 ; i < sizeof (vec_t ); ++i) {
206+ v |= static_cast <vec_t >(static_cast <unsigned char >(p[i]))
207+ << (i * CHAR_BIT);
208+ }
209+
210+ constexpr unsigned int msb = 1u << (CHAR_BIT - 1 );
211+ const vec_t v_msb = v & BroadcastByte<vec_t >(msb);
212+ const vec_t v_nonascii_mask = (v_msb << 1 ) - (v_msb >> (CHAR_BIT - 1 ));
213+ const vec_t v_nonascii = v & v_nonascii_mask;
214+ const vec_t v_ascii = v & ~v_nonascii_mask;
215+ const vec_t a = v_ascii + BroadcastByte<vec_t >(msb - ch_a - 0 ),
216+ z = v_ascii + BroadcastByte<vec_t >(msb - ch_z - 1 );
217+ v = v_nonascii | (v_ascii ^ ((a ^ z) & BroadcastByte<vec_t >(msb)) >> 2 );
218+
219+ // memcpy the vector, but constexpr
220+ for (size_t i = 0 ; i < sizeof (vec_t ); ++i) {
221+ p[i] = static_cast <char >(v >> (i * CHAR_BIT));
222+ }
223+
224+ p += sizeof (v);
225+ }
226+
227+ return p;
228+ }
229+
230+ template <bool ToUpper>
231+ static constexpr void AsciiStrCaseFold (absl::Nonnull<char *> p,
232+ absl::Nonnull<char *> end) {
180233 // The upper- and lowercase versions of ASCII characters differ by only 1 bit.
181234 // When we need to flip the case, we can xor with this bit to achieve the
182235 // desired result. Note that the choice of 'a' and 'A' here is arbitrary. We
183236 // could have chosen 'z' and 'Z', or any other pair of characters as they all
184237 // have the same single bit difference.
185238 constexpr unsigned char kAsciiCaseBitFlip = ' a' ^ ' A' ;
186239
187- #ifdef __clang__
188- // Temporary workaround until the mentioned bug is fixed.
189- // NOLINTNEXTLINE(whitespace/line_length)
190- #pragma clang loop vectorize(enable)
191- #endif
192- for (; p < end; ++p) {
240+ using vec_t = size_t ;
241+ // TODO(b/316380338): When FDO becomes able to vectorize these,
242+ // revert this manual optimization and just leave the naive loop.
243+ if (static_cast <size_t >(end - p) >= sizeof (vec_t )) {
244+ p = ascii_internal::PartialAsciiStrCaseFold<ToUpper>(p, end);
245+ }
246+ while (p < end) {
193247 unsigned char v = static_cast <unsigned char >(*p);
194248 v ^= AsciiInAZRange<ToUpper>(v) ? kAsciiCaseBitFlip : 0 ;
195249 *p = static_cast <char >(v);
250+ ++p;
196251 }
197252}
198253
0 commit comments