Skip to content

Commit 530de74

Browse files
Copilotjll63
andcommitted
Add minimal_perfect_hash policy and test suite
Co-authored-by: jll63 <[email protected]>
1 parent b420157 commit 530de74

File tree

2 files changed

+542
-0
lines changed

2 files changed

+542
-0
lines changed
Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
// Copyright (c) 2018-2025 Jean-Louis Leroy
2+
// Distributed under the Boost Software License, Version 1.0.
3+
// See accompanying file LICENSE_1_0.txt
4+
// or copy at http://www.boost.org/LICENSE_1_0.txt)
5+
6+
#ifndef BOOST_OPENMETHOD_POLICY_MINIMAL_PERFECT_HASH_HPP
7+
#define BOOST_OPENMETHOD_POLICY_MINIMAL_PERFECT_HASH_HPP
8+
9+
#include <boost/openmethod/preamble.hpp>
10+
11+
#include <limits>
12+
#include <random>
13+
#ifdef _MSC_VER
14+
#pragma warning(push)
15+
#pragma warning(disable : 4702) // unreachable code
16+
#endif
17+
18+
namespace boost::openmethod {
19+
20+
namespace detail {
21+
22+
template<class Registry>
23+
std::vector<type_id> minimal_perfect_hash_control;
24+
25+
} // namespace detail
26+
27+
namespace policies {
28+
29+
//! Hash type ids using a minimal perfect hash function.
30+
//!
31+
//! `minimal_perfect_hash` implements the @ref type_hash policy using a hash
32+
//! function in the form `H(x)=(M*x)>>N`. It uses the PtHash algorithm to
33+
//! determine values for `M` and `N` that result in a minimal perfect hash
34+
//! function for the set of registered type_ids. This means that the hash
35+
//! function is collision-free and the codomain is exactly the size of the
36+
//! domain, resulting in a dense range [0, n-1] for n inputs.
37+
struct minimal_perfect_hash : type_hash {
38+
39+
//! Cannot find hash factors
40+
struct search_error : openmethod_error {
41+
//! Number of attempts to find hash factors
42+
std::size_t attempts;
43+
//! Number of buckets used in the last attempt
44+
std::size_t buckets;
45+
46+
//! Write a short description to an output stream
47+
//! @param os The output stream
48+
//! @tparam Registry The registry
49+
//! @tparam Stream A @ref LightweightOutputStream
50+
template<class Registry, class Stream>
51+
auto write(Stream& os) const -> void;
52+
};
53+
54+
using errors = std::variant<search_error>;
55+
56+
//! A TypeHashFn metafunction.
57+
//!
58+
//! @tparam Registry The registry containing this policy
59+
template<class Registry>
60+
class fn {
61+
static std::size_t mult;
62+
static std::size_t shift;
63+
static std::size_t min_value;
64+
static std::size_t max_value;
65+
66+
static void check(std::size_t index, type_id type);
67+
68+
template<class InitializeContext, class... Options>
69+
static void initialize(
70+
const InitializeContext& ctx, std::vector<type_id>& buckets,
71+
const std::tuple<Options...>& options);
72+
73+
public:
74+
//! Find the hash factors
75+
//!
76+
//! Attempts to find suitable values for the multiplication factor `M`
77+
//! and the shift amount `N` that result in a minimal perfect hash
78+
//! function for the specified input values.
79+
//!
80+
//! If no suitable values are found, calls the error handler with
81+
//! a @ref hash_error object then calls `abort`.
82+
//!
83+
//! @tparam Context An @ref InitializeContext.
84+
//! @param ctx A Context object.
85+
//! @return A pair containing the minimum and maximum hash values.
86+
template<class Context, class... Options>
87+
static auto
88+
initialize(const Context& ctx, const std::tuple<Options...>& options) {
89+
if constexpr (Registry::has_runtime_checks) {
90+
initialize(
91+
ctx, detail::minimal_perfect_hash_control<Registry>, options);
92+
} else {
93+
std::vector<type_id> buckets;
94+
initialize(ctx, buckets, options);
95+
}
96+
97+
return std::pair{min_value, max_value};
98+
}
99+
100+
//! Hash a type id
101+
//!
102+
//! Hash a type id.
103+
//!
104+
//! If `Registry` contains the @ref runtime_checks policy, checks that
105+
//! the type id is valid, i.e. if it was present in the set passed to
106+
//! @ref initialize. Its absence indicates that a class involved in a
107+
//! method definition, method overrider, or method call was not
108+
//! registered. In this case, signal a @ref missing_class using
109+
//! the registry's @ref error_handler if present; then calls `abort`.
110+
//!
111+
//! @param type The type_id to hash
112+
//! @return The hash value
113+
BOOST_FORCEINLINE
114+
static auto hash(type_id type) -> std::size_t {
115+
auto index =
116+
(mult * reinterpret_cast<detail::uintptr>(type)) >> shift;
117+
118+
if constexpr (Registry::has_runtime_checks) {
119+
check(index, type);
120+
}
121+
122+
return index;
123+
}
124+
125+
//! Releases the memory allocated by `initialize`.
126+
//!
127+
//! @tparam Options... Zero or more option types, deduced from the function
128+
//! arguments.
129+
//! @param options Zero or more option objects.
130+
template<class... Options>
131+
static auto finalize(const std::tuple<Options...>&) -> void {
132+
detail::minimal_perfect_hash_control<Registry>.clear();
133+
}
134+
};
135+
};
136+
137+
template<class Registry>
138+
std::size_t minimal_perfect_hash::fn<Registry>::mult;
139+
140+
template<class Registry>
141+
std::size_t minimal_perfect_hash::fn<Registry>::shift;
142+
143+
template<class Registry>
144+
std::size_t minimal_perfect_hash::fn<Registry>::min_value;
145+
146+
template<class Registry>
147+
std::size_t minimal_perfect_hash::fn<Registry>::max_value;
148+
149+
template<class Registry>
150+
template<class InitializeContext, class... Options>
151+
void minimal_perfect_hash::fn<Registry>::initialize(
152+
const InitializeContext& ctx, std::vector<type_id>& buckets,
153+
const std::tuple<Options...>& options) {
154+
(void)options;
155+
156+
const auto N = std::distance(ctx.classes_begin(), ctx.classes_end());
157+
158+
if constexpr (mp11::mp_contains<mp11::mp_list<Options...>, trace>::value) {
159+
Registry::output::os << "Finding minimal perfect hash factors for " << N << " types\n";
160+
}
161+
162+
// For minimal perfect hash, we need exactly N buckets
163+
std::size_t hash_size = N;
164+
165+
if (hash_size == 0) {
166+
min_value = 0;
167+
max_value = 0;
168+
shift = 0;
169+
mult = 1;
170+
return;
171+
}
172+
173+
std::default_random_engine rnd(13081963);
174+
std::size_t total_attempts = 0;
175+
176+
// Calculate M (number of bits needed to represent hash_size)
177+
std::size_t M = 0;
178+
for (auto size = hash_size; size > 0; size >>= 1) {
179+
++M;
180+
}
181+
if (M > 0) {
182+
M--;
183+
}
184+
185+
std::uniform_int_distribution<std::size_t> uniform_dist;
186+
187+
// Try increasing values of M for better distribution
188+
for (std::size_t pass = 0; pass < 4; ++pass, ++M) {
189+
shift = 8 * sizeof(type_id) - M;
190+
min_value = (std::numeric_limits<std::size_t>::max)();
191+
max_value = (std::numeric_limits<std::size_t>::min)();
192+
193+
if constexpr (InitializeContext::template has_option<trace>) {
194+
ctx.tr << " trying with M = " << M << ", " << hash_size
195+
<< " buckets (minimal)\n";
196+
}
197+
198+
std::size_t attempts = 0;
199+
buckets.resize(hash_size);
200+
201+
while (attempts < 100000) {
202+
std::fill(
203+
buckets.begin(), buckets.end(), type_id(detail::uintptr_max));
204+
++attempts;
205+
++total_attempts;
206+
mult = uniform_dist(rnd) | 1;
207+
208+
bool collision_found = false;
209+
for (auto iter = ctx.classes_begin(); iter != ctx.classes_end();
210+
++iter) {
211+
for (auto type_iter = iter->type_id_begin();
212+
type_iter != iter->type_id_end(); ++type_iter) {
213+
auto type = *type_iter;
214+
auto index = (detail::uintptr(type) * mult) >> shift;
215+
216+
// For minimal perfect hash, index must be in [0, N)
217+
if (index >= hash_size) {
218+
collision_found = true;
219+
goto collision;
220+
}
221+
222+
min_value = (std::min)(min_value, index);
223+
max_value = (std::max)(max_value, index);
224+
225+
if (detail::uintptr(buckets[index]) !=
226+
detail::uintptr_max) {
227+
collision_found = true;
228+
goto collision;
229+
}
230+
231+
buckets[index] = type;
232+
}
233+
}
234+
235+
// Verify that we have a minimal perfect hash (all buckets used)
236+
for (std::size_t i = 0; i < hash_size; ++i) {
237+
if (detail::uintptr(buckets[i]) == detail::uintptr_max) {
238+
collision_found = true;
239+
goto collision;
240+
}
241+
}
242+
243+
if constexpr (InitializeContext::template has_option<trace>) {
244+
ctx.tr << " found " << mult << " after " << total_attempts
245+
<< " attempts; span = [" << min_value << ", "
246+
<< max_value << "], size = " << (max_value - min_value + 1) << "\n";
247+
}
248+
249+
return;
250+
251+
collision: {}
252+
}
253+
}
254+
255+
search_error error;
256+
error.attempts = total_attempts;
257+
error.buckets = hash_size;
258+
259+
if constexpr (Registry::has_error_handler) {
260+
Registry::error_handler::error(error);
261+
}
262+
263+
abort();
264+
}
265+
266+
template<class Registry>
267+
void minimal_perfect_hash::fn<Registry>::check(std::size_t index, type_id type) {
268+
if (index < min_value || index > max_value ||
269+
detail::minimal_perfect_hash_control<Registry>[index] != type) {
270+
271+
if constexpr (Registry::has_error_handler) {
272+
missing_class error;
273+
error.type = type;
274+
Registry::error_handler::error(error);
275+
}
276+
277+
abort();
278+
}
279+
}
280+
281+
template<class Registry, class Stream>
282+
auto minimal_perfect_hash::search_error::write(Stream& os) const -> void {
283+
os << "could not find minimal perfect hash factors after " << attempts
284+
<< " attempts using " << buckets << " buckets\n";
285+
}
286+
287+
} // namespace policies
288+
} // namespace boost::openmethod
289+
290+
#endif

0 commit comments

Comments
 (0)