2626#include " absl/base/prefetch.h"
2727#include " absl/hash/internal/city.h"
2828
29-
3029#ifdef ABSL_AES_INTERNAL_HAVE_X86_SIMD
3130#error ABSL_AES_INTERNAL_HAVE_X86_SIMD cannot be directly set
3231#elif defined(__SSE4_2__) && defined(__AES__)
@@ -46,18 +45,20 @@ namespace hash_internal {
4645
4746namespace {
4847
49- uint64_t Mix32Bytes (const uint8_t * ptr, uint64_t current_state) {
50- uint64_t a = absl::base_internal::UnalignedLoad64 (ptr);
51- uint64_t b = absl::base_internal::UnalignedLoad64 (ptr + 8 );
52- uint64_t c = absl::base_internal::UnalignedLoad64 (ptr + 16 );
53- uint64_t d = absl::base_internal::UnalignedLoad64 (ptr + 24 );
54-
55- uint64_t cs0 = Mix (a ^ kStaticRandomData [1 ], b ^ current_state);
56- uint64_t cs1 = Mix (c ^ kStaticRandomData [2 ], d ^ current_state);
57- return cs0 ^ cs1;
48+ void PrefetchFutureDataToLocalCache (const uint8_t * ptr) {
49+ PrefetchToLocalCache (ptr + 5 * ABSL_CACHELINE_SIZE);
5850}
5951
6052#ifdef ABSL_AES_INTERNAL_HAVE_X86_SIMD
53+ uint64_t Mix4x16Vectors (__m128i a, __m128i b, __m128i c, __m128i d) {
54+ // res128 = decrypt(a + c, d) + decrypt(b + d, a)
55+ auto res128 = _mm_add_epi64 (_mm_aesenc_si128 (_mm_add_epi64 (a, c), d),
56+ _mm_aesdec_si128 (_mm_sub_epi64 (b, d), a));
57+ auto x64 = static_cast <uint64_t >(_mm_cvtsi128_si64 (res128));
58+ auto y64 = static_cast <uint64_t >(_mm_extract_epi64 (res128, 1 ));
59+ return x64 ^ y64;
60+ }
61+
6162uint64_t LowLevelHash33To64 (uint64_t seed, const uint8_t * ptr, size_t len) {
6263 assert (len > 32 );
6364 assert (len <= 64 );
@@ -84,21 +85,89 @@ uint64_t LowLevelHash33To64(uint64_t seed, const uint8_t* ptr, size_t len) {
8485
8586 // We perform another round of encryption to mix bits between two halves of
8687 // the input.
87- auto res128 = _mm_add_epi64 (_mm_aesenc_si128 (_mm_add_epi64 (na, nc), nd),
88- _mm_aesdec_si128 (_mm_sub_epi64 (nb, nd), na));
89- auto x64 = static_cast <uint64_t >(_mm_cvtsi128_si64 (res128));
90- auto y64 = static_cast <uint64_t >(_mm_extract_epi64 (res128, 1 ));
91- return x64 ^ y64;
88+ return Mix4x16Vectors (na, nb, nc, nd);
89+ }
90+
91+ [[maybe_unused]] ABSL_ATTRIBUTE_NOINLINE uint64_t
92+ LowLevelHashLenGt64 (uint64_t seed, const void * data, size_t len) {
93+ assert (len > 64 );
94+ const uint8_t * ptr = static_cast <const uint8_t *>(data);
95+ const uint8_t * last_32_ptr = ptr + len - 32 ;
96+
97+ // If we have more than 64 bytes, we're going to handle chunks of 64
98+ // bytes at a time. We're going to build up four separate hash states
99+ // which we will then hash together. This avoids short dependency chains.
100+ __m128i state0 =
101+ _mm_set_epi64x (static_cast <int64_t >(seed), static_cast <int64_t >(len));
102+ __m128i state1 = state0;
103+ __m128i state2 = state1;
104+ __m128i state3 = state2;
105+
106+ // Mixing two 128-bit vectors at a time with corresponding states.
107+ // All variables are mixed slightly differently to avoid hash collision
108+ // due to trivial byte rotation.
109+ // We combine state and data with _mm_add_epi64/_mm_sub_epi64 before applying
110+ // AES encryption to make hash function dependent on the order of the blocks.
111+ // See comments in LowLevelHash33To64 for more considerations.
112+ auto mix_ab = [&state0,
113+ &state1](const uint8_t * p) ABSL_ATTRIBUTE_ALWAYS_INLINE {
114+ // i128 a = *p;
115+ // i128 b = *(p + 16);
116+ // state0 = decrypt(state0 + a, state0);
117+ // state1 = decrypt(state1 - b, state1);
118+ auto a = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(p));
119+ auto b = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(p + 16 ));
120+ state0 = _mm_aesdec_si128 (_mm_add_epi64 (state0, a), state0);
121+ state1 = _mm_aesdec_si128 (_mm_sub_epi64 (state1, b), state1);
122+ };
123+ auto mix_cd = [&state2,
124+ &state3](const uint8_t * p) ABSL_ATTRIBUTE_ALWAYS_INLINE {
125+ // i128 c = *p;
126+ // i128 d = *(p + 16);
127+ // state2 = encrypt(state2 + c, state2);
128+ // state3 = encrypt(state3 - d, state3);
129+ auto c = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(p));
130+ auto d = _mm_loadu_si128 (reinterpret_cast <const __m128i*>(p + 16 ));
131+ state2 = _mm_aesenc_si128 (_mm_add_epi64 (state2, c), state2);
132+ state3 = _mm_aesenc_si128 (_mm_sub_epi64 (state3, d), state3);
133+ };
134+
135+ do {
136+ PrefetchFutureDataToLocalCache (ptr);
137+ mix_ab (ptr);
138+ mix_cd (ptr + 32 );
139+
140+ ptr += 64 ;
141+ len -= 64 ;
142+ } while (len > 64 );
143+
144+ // We now have a data `ptr` with at most 64 bytes.
145+ if (len > 32 ) {
146+ mix_ab (ptr);
147+ }
148+ mix_cd (last_32_ptr);
149+
150+ return Mix4x16Vectors (state0, state1, state2, state3);
92151}
93152#else
153+ uint64_t Mix32Bytes (const uint8_t * ptr, uint64_t current_state) {
154+ uint64_t a = absl::base_internal::UnalignedLoad64 (ptr);
155+ uint64_t b = absl::base_internal::UnalignedLoad64 (ptr + 8 );
156+ uint64_t c = absl::base_internal::UnalignedLoad64 (ptr + 16 );
157+ uint64_t d = absl::base_internal::UnalignedLoad64 (ptr + 24 );
158+
159+ uint64_t cs0 = Mix (a ^ kStaticRandomData [1 ], b ^ current_state);
160+ uint64_t cs1 = Mix (c ^ kStaticRandomData [2 ], d ^ current_state);
161+ return cs0 ^ cs1;
162+ }
163+
94164uint64_t LowLevelHash33To64 (uint64_t seed, const uint8_t * ptr, size_t len) {
95165 assert (len > 32 );
96166 assert (len <= 64 );
97167 uint64_t current_state = seed ^ kStaticRandomData [0 ] ^ len;
98168 const uint8_t * last_32_ptr = ptr + len - 32 ;
99169 return Mix32Bytes (last_32_ptr, Mix32Bytes (ptr, current_state));
100170}
101- #endif // ABSL_AES_INTERNAL_HAVE_X86_SIMD
102171
103172[[maybe_unused]] ABSL_ATTRIBUTE_NOINLINE uint64_t
104173LowLevelHashLenGt64 (uint64_t seed, const void * data, size_t len) {
@@ -114,7 +183,7 @@ LowLevelHashLenGt64(uint64_t seed, const void* data, size_t len) {
114183 uint64_t duplicated_state2 = current_state;
115184
116185 do {
117- PrefetchToLocalCache (ptr + 5 * ABSL_CACHELINE_SIZE );
186+ PrefetchFutureDataToLocalCache (ptr);
118187
119188 uint64_t a = absl::base_internal::UnalignedLoad64 (ptr);
120189 uint64_t b = absl::base_internal::UnalignedLoad64 (ptr + 8 );
@@ -148,6 +217,7 @@ LowLevelHashLenGt64(uint64_t seed, const void* data, size_t len) {
148217 // safely read from `ptr + len - 32`.
149218 return Mix32Bytes (last_32_ptr, current_state);
150219}
220+ #endif // ABSL_AES_INTERNAL_HAVE_X86_SIMD
151221
152222[[maybe_unused]] uint64_t LowLevelHashLenGt32 (uint64_t seed, const void * data,
153223 size_t len) {
0 commit comments