@@ -48,14 +48,14 @@ namespace policies {
4848// ! function in the form `H(x)=(M*x)>>N`. It uses the PtHash algorithm to
4949// ! determine values for `M` and `N` that result in a minimal perfect hash
5050// ! function for the set of registered type_ids. This means that the hash
51- // ! function is collision-free and the codomain is exactly the size of the
52- // ! domain, resulting in a dense range [0, n-1] for n inputs.
51+ // ! function is collision-free and the codomain is approximately the size of
52+ // ! the domain, resulting in a dense range [0, n-1] for n inputs.
5353// !
5454// ! Unlike @ref fast_perfect_hash, which uses a hash table of size 2^k
5555// ! (typically larger than needed) and may have unused slots, this policy
56- // ! ensures the hash table has exactly n slots for n type_ids, with all
57- // ! slots filled. This minimizes memory usage but may require more search
58- // ! attempts during initialization.
56+ // ! uses approximately 1.1* n slots for n type_ids (allowing up to 10% waste).
57+ // ! This minimizes memory usage while maintaining good search performance
58+ // ! during initialization.
5959struct minimal_perfect_hash : type_hash {
6060
6161 // ! Cannot find hash factors
@@ -193,8 +193,11 @@ void minimal_perfect_hash::fn<Registry>::initialize(
193193 ctx.tr << " Finding minimal perfect hash using PtHash for " << N << " types\n " ;
194194 }
195195
196- // Table size is exactly N for minimal perfect hash
197- table_size = N;
196+ // Table size is N * 1.1 to allow up to 10% waste (makes finding hash easier)
197+ table_size = N + N / 10 ;
198+ if (table_size == N && N > 0 ) {
199+ table_size = N + 1 ; // Ensure at least 1 extra slot for N > 0
200+ }
198201
199202 if (table_size == 0 ) {
200203 shift = 0 ;
@@ -241,6 +244,7 @@ void minimal_perfect_hash::fn<Registry>::initialize(
241244 constexpr std::size_t DEFAULT_GROUP_DIVISOR = 4 ; // N/4 groups for balance between memory and speed
242245 constexpr std::size_t DISTRIBUTION_FACTOR = 2 ; // 2*N range for better distribution
243246 constexpr std::size_t bits_per_type_id = 8 * sizeof (type_id);
247+ // Allow 10% waste to make finding a hash function easier while still being memory-efficient
244248
245249 std::default_random_engine rnd (DEFAULT_RANDOM_SEED);
246250 std::uniform_int_distribution<std::size_t > uniform_dist;
@@ -343,19 +347,20 @@ void minimal_perfect_hash::fn<Registry>::initialize(
343347 }
344348
345349 if (success) {
346- // Verify all positions are used (minimal property)
347- bool all_used = true ;
350+ // Count how many positions are used
351+ std:: size_t used_count = 0 ;
348352 for (std::size_t i = 0 ; i < table_size; ++i) {
349- if (detail::uintptr (buckets[i]) == detail::uintptr_max) {
350- all_used = false ;
351- break ;
353+ if (detail::uintptr (buckets[i]) != detail::uintptr_max) {
354+ used_count++;
352355 }
353356 }
354357
355- if (all_used) {
358+ // Accept if we've placed all keys (allow up to 10% waste)
359+ if (used_count == keys.size ()) {
356360 if constexpr (InitializeContext::template has_option<trace>) {
357361 ctx.tr << " Found minimal perfect hash after " << total_attempts
358- << " attempts\n " ;
362+ << " attempts; " << used_count << " /" << table_size
363+ << " slots used\n " ;
359364 }
360365 return ;
361366 }
0 commit comments