Skip to content

Commit b40a368

Browse files
authored
perf: Optimize hash performance by avoiding allocating hash state object (#5469)
We're currently calling `XXH3_createState` and `XXH3_freeState` when hashing an object. However, it may be slow because they call `malloc` and `free`, which may affect the performance. This change avoids the use of the streaming API as much as possible by using an internal buffer.
1 parent 86ef16d commit b40a368

File tree

2 files changed

+330
-20
lines changed

2 files changed

+330
-20
lines changed

include/xrpl/beast/hash/xxhasher.h

Lines changed: 98 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -24,32 +24,110 @@
2424

2525
#include <xxhash.h>
2626

27+
#include <array>
2728
#include <cstddef>
28-
#include <new>
29-
#include <type_traits>
29+
#include <cstdint>
30+
#include <optional>
31+
#include <span>
3032

3133
namespace beast {
3234

3335
class xxhasher
3436
{
37+
public:
38+
using result_type = std::size_t;
39+
3540
private:
36-
// requires 64-bit std::size_t
37-
static_assert(sizeof(std::size_t) == 8, "");
41+
static_assert(sizeof(std::size_t) == 8, "requires 64-bit std::size_t");
42+
// Have an internal buffer to avoid the streaming API
43+
// A 64-byte buffer should to be big enough for us
44+
static constexpr std::size_t INTERNAL_BUFFER_SIZE = 64;
3845

39-
XXH3_state_t* state_;
46+
alignas(64) std::array<std::uint8_t, INTERNAL_BUFFER_SIZE> buffer_;
47+
std::span<std::uint8_t> readBuffer_;
48+
std::span<std::uint8_t> writeBuffer_;
49+
50+
std::optional<XXH64_hash_t> seed_;
51+
XXH3_state_t* state_ = nullptr;
52+
53+
void
54+
resetBuffers()
55+
{
56+
writeBuffer_ = std::span{buffer_};
57+
readBuffer_ = {};
58+
}
59+
60+
void
61+
updateHash(void const* data, std::size_t len)
62+
{
63+
if (writeBuffer_.size() < len)
64+
{
65+
flushToState(data, len);
66+
}
67+
else
68+
{
69+
std::memcpy(writeBuffer_.data(), data, len);
70+
writeBuffer_ = writeBuffer_.subspan(len);
71+
readBuffer_ = std::span{
72+
std::begin(buffer_), buffer_.size() - writeBuffer_.size()};
73+
}
74+
}
4075

4176
static XXH3_state_t*
4277
allocState()
4378
{
4479
auto ret = XXH3_createState();
4580
if (ret == nullptr)
46-
throw std::bad_alloc();
81+
throw std::bad_alloc(); // LCOV_EXCL_LINE
4782
return ret;
4883
}
4984

50-
public:
51-
using result_type = std::size_t;
85+
void
86+
flushToState(void const* data, std::size_t len)
87+
{
88+
if (!state_)
89+
{
90+
state_ = allocState();
91+
if (seed_.has_value())
92+
{
93+
XXH3_64bits_reset_withSeed(state_, *seed_);
94+
}
95+
else
96+
{
97+
XXH3_64bits_reset(state_);
98+
}
99+
}
100+
XXH3_64bits_update(state_, readBuffer_.data(), readBuffer_.size());
101+
resetBuffers();
102+
if (data && len)
103+
{
104+
XXH3_64bits_update(state_, data, len);
105+
}
106+
}
52107

108+
result_type
109+
retrieveHash()
110+
{
111+
if (state_)
112+
{
113+
flushToState(nullptr, 0);
114+
return XXH3_64bits_digest(state_);
115+
}
116+
else
117+
{
118+
if (seed_.has_value())
119+
{
120+
return XXH3_64bits_withSeed(
121+
readBuffer_.data(), readBuffer_.size(), *seed_);
122+
}
123+
else
124+
{
125+
return XXH3_64bits(readBuffer_.data(), readBuffer_.size());
126+
}
127+
}
128+
}
129+
130+
public:
53131
static constexpr auto const endian = boost::endian::order::native;
54132

55133
xxhasher(xxhasher const&) = delete;
@@ -58,43 +136,43 @@ class xxhasher
58136

59137
xxhasher()
60138
{
61-
state_ = allocState();
62-
XXH3_64bits_reset(state_);
139+
resetBuffers();
63140
}
64141

65142
~xxhasher() noexcept
66143
{
67-
XXH3_freeState(state_);
144+
if (state_)
145+
{
146+
XXH3_freeState(state_);
147+
}
68148
}
69149

70150
template <
71151
class Seed,
72152
std::enable_if_t<std::is_unsigned<Seed>::value>* = nullptr>
73-
explicit xxhasher(Seed seed)
153+
explicit xxhasher(Seed seed) : seed_(seed)
74154
{
75-
state_ = allocState();
76-
XXH3_64bits_reset_withSeed(state_, seed);
155+
resetBuffers();
77156
}
78157

79158
template <
80159
class Seed,
81160
std::enable_if_t<std::is_unsigned<Seed>::value>* = nullptr>
82-
xxhasher(Seed seed, Seed)
161+
xxhasher(Seed seed, Seed) : seed_(seed)
83162
{
84-
state_ = allocState();
85-
XXH3_64bits_reset_withSeed(state_, seed);
163+
resetBuffers();
86164
}
87165

88166
void
89167
operator()(void const* key, std::size_t len) noexcept
90168
{
91-
XXH3_64bits_update(state_, key, len);
169+
updateHash(key, len);
92170
}
93171

94172
explicit
95-
operator std::size_t() noexcept
173+
operator result_type() noexcept
96174
{
97-
return XXH3_64bits_digest(state_);
175+
return retrieveHash();
98176
}
99177
};
100178

0 commit comments

Comments
 (0)