Skip to content

Commit 7e44f68

Browse files
Copilotjll63
andcommitted
Allow up to 10% bucket waste in minimal_perfect_hash
Co-authored-by: jll63 <[email protected]>
1 parent b486642 commit 7e44f68

File tree

1 file changed

+19
-14
lines changed

1 file changed

+19
-14
lines changed

include/boost/openmethod/policies/minimal_perfect_hash.hpp

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,14 @@ namespace policies {
4848
//! function in the form `H(x)=(M*x)>>N`. It uses the PtHash algorithm to
4949
//! determine values for `M` and `N` that result in a minimal perfect hash
5050
//! function for the set of registered type_ids. This means that the hash
51-
//! function is collision-free and the codomain is exactly the size of the
52-
//! domain, resulting in a dense range [0, n-1] for n inputs.
51+
//! function is collision-free and the codomain is approximately the size of
52+
//! the domain, resulting in a dense range [0, n-1] for n inputs.
5353
//!
5454
//! Unlike @ref fast_perfect_hash, which uses a hash table of size 2^k
5555
//! (typically larger than needed) and may have unused slots, this policy
56-
//! ensures the hash table has exactly n slots for n type_ids, with all
57-
//! slots filled. This minimizes memory usage but may require more search
58-
//! attempts during initialization.
56+
//! uses approximately 1.1*n slots for n type_ids (allowing up to 10% waste).
57+
//! This minimizes memory usage while maintaining good search performance
58+
//! during initialization.
5959
struct minimal_perfect_hash : type_hash {
6060

6161
//! Cannot find hash factors
@@ -193,8 +193,11 @@ void minimal_perfect_hash::fn<Registry>::initialize(
193193
ctx.tr << "Finding minimal perfect hash using PtHash for " << N << " types\n";
194194
}
195195

196-
// Table size is exactly N for minimal perfect hash
197-
table_size = N;
196+
// Table size is N * 1.1 to allow up to 10% waste (makes finding hash easier)
197+
table_size = N + N / 10;
198+
if (table_size == N && N > 0) {
199+
table_size = N + 1; // Ensure at least 1 extra slot for N > 0
200+
}
198201

199202
if (table_size == 0) {
200203
shift = 0;
@@ -241,6 +244,7 @@ void minimal_perfect_hash::fn<Registry>::initialize(
241244
constexpr std::size_t DEFAULT_GROUP_DIVISOR = 4; // N/4 groups for balance between memory and speed
242245
constexpr std::size_t DISTRIBUTION_FACTOR = 2; // 2*N range for better distribution
243246
constexpr std::size_t bits_per_type_id = 8 * sizeof(type_id);
247+
// Allow 10% waste to make finding a hash function easier while still being memory-efficient
244248

245249
std::default_random_engine rnd(DEFAULT_RANDOM_SEED);
246250
std::uniform_int_distribution<std::size_t> uniform_dist;
@@ -343,19 +347,20 @@ void minimal_perfect_hash::fn<Registry>::initialize(
343347
}
344348

345349
if (success) {
346-
// Verify all positions are used (minimal property)
347-
bool all_used = true;
350+
// Count how many positions are used
351+
std::size_t used_count = 0;
348352
for (std::size_t i = 0; i < table_size; ++i) {
349-
if (detail::uintptr(buckets[i]) == detail::uintptr_max) {
350-
all_used = false;
351-
break;
353+
if (detail::uintptr(buckets[i]) != detail::uintptr_max) {
354+
used_count++;
352355
}
353356
}
354357

355-
if (all_used) {
358+
// Accept if we've placed all keys (allow up to 10% waste)
359+
if (used_count == keys.size()) {
356360
if constexpr (InitializeContext::template has_option<trace>) {
357361
ctx.tr << " Found minimal perfect hash after " << total_attempts
358-
<< " attempts\n";
362+
<< " attempts; " << used_count << "/" << table_size
363+
<< " slots used\n";
359364
}
360365
return;
361366
}

0 commit comments

Comments
 (0)