|
| 1 | +// Copyright (c) 2018-2025 Jean-Louis Leroy |
| 2 | +// Distributed under the Boost Software License, Version 1.0. |
| 3 | +// See accompanying file LICENSE_1_0.txt |
| 4 | +// or copy at http://www.boost.org/LICENSE_1_0.txt) |
| 5 | + |
| 6 | +#ifndef BOOST_OPENMETHOD_POLICY_MINIMAL_PERFECT_HASH_HPP |
| 7 | +#define BOOST_OPENMETHOD_POLICY_MINIMAL_PERFECT_HASH_HPP |
| 8 | + |
| 9 | +#include <boost/openmethod/preamble.hpp> |
| 10 | + |
| 11 | +#include <limits> |
| 12 | +#include <random> |
| 13 | +#ifdef _MSC_VER |
| 14 | +#pragma warning(push) |
| 15 | +#pragma warning(disable : 4702) // unreachable code |
| 16 | +#endif |
| 17 | + |
| 18 | +namespace boost::openmethod { |
| 19 | + |
| 20 | +namespace detail { |
| 21 | + |
| 22 | +template<class Registry> |
| 23 | +std::vector<type_id> minimal_perfect_hash_control; |
| 24 | + |
| 25 | +} // namespace detail |
| 26 | + |
| 27 | +namespace policies { |
| 28 | + |
| 29 | +//! Hash type ids using a minimal perfect hash function. |
| 30 | +//! |
| 31 | +//! `minimal_perfect_hash` implements the @ref type_hash policy using a hash |
| 32 | +//! function in the form `H(x)=(M*x)>>N`. It uses the PtHash algorithm to |
| 33 | +//! determine values for `M` and `N` that result in a minimal perfect hash |
| 34 | +//! function for the set of registered type_ids. This means that the hash |
| 35 | +//! function is collision-free and the codomain is exactly the size of the |
| 36 | +//! domain, resulting in a dense range [0, n-1] for n inputs. |
| 37 | +struct minimal_perfect_hash : type_hash { |
| 38 | + |
| 39 | + //! Cannot find hash factors |
| 40 | + struct search_error : openmethod_error { |
| 41 | + //! Number of attempts to find hash factors |
| 42 | + std::size_t attempts; |
| 43 | + //! Number of buckets used in the last attempt |
| 44 | + std::size_t buckets; |
| 45 | + |
| 46 | + //! Write a short description to an output stream |
| 47 | + //! @param os The output stream |
| 48 | + //! @tparam Registry The registry |
| 49 | + //! @tparam Stream A @ref LightweightOutputStream |
| 50 | + template<class Registry, class Stream> |
| 51 | + auto write(Stream& os) const -> void; |
| 52 | + }; |
| 53 | + |
| 54 | + using errors = std::variant<search_error>; |
| 55 | + |
| 56 | + //! A TypeHashFn metafunction. |
| 57 | + //! |
| 58 | + //! @tparam Registry The registry containing this policy |
| 59 | + template<class Registry> |
| 60 | + class fn { |
| 61 | + static std::size_t mult; |
| 62 | + static std::size_t shift; |
| 63 | + static std::size_t min_value; |
| 64 | + static std::size_t max_value; |
| 65 | + |
| 66 | + static void check(std::size_t index, type_id type); |
| 67 | + |
| 68 | + template<class InitializeContext, class... Options> |
| 69 | + static void initialize( |
| 70 | + const InitializeContext& ctx, std::vector<type_id>& buckets, |
| 71 | + const std::tuple<Options...>& options); |
| 72 | + |
| 73 | + public: |
| 74 | + //! Find the hash factors |
| 75 | + //! |
| 76 | + //! Attempts to find suitable values for the multiplication factor `M` |
| 77 | + //! and the shift amount `N` that result in a minimal perfect hash |
| 78 | + //! function for the specified input values. |
| 79 | + //! |
| 80 | + //! If no suitable values are found, calls the error handler with |
| 81 | + //! a @ref hash_error object then calls `abort`. |
| 82 | + //! |
| 83 | + //! @tparam Context An @ref InitializeContext. |
| 84 | + //! @param ctx A Context object. |
| 85 | + //! @return A pair containing the minimum and maximum hash values. |
| 86 | + template<class Context, class... Options> |
| 87 | + static auto |
| 88 | + initialize(const Context& ctx, const std::tuple<Options...>& options) { |
| 89 | + if constexpr (Registry::has_runtime_checks) { |
| 90 | + initialize( |
| 91 | + ctx, detail::minimal_perfect_hash_control<Registry>, options); |
| 92 | + } else { |
| 93 | + std::vector<type_id> buckets; |
| 94 | + initialize(ctx, buckets, options); |
| 95 | + } |
| 96 | + |
| 97 | + return std::pair{min_value, max_value}; |
| 98 | + } |
| 99 | + |
| 100 | + //! Hash a type id |
| 101 | + //! |
| 102 | + //! Hash a type id. |
| 103 | + //! |
| 104 | + //! If `Registry` contains the @ref runtime_checks policy, checks that |
| 105 | + //! the type id is valid, i.e. if it was present in the set passed to |
| 106 | + //! @ref initialize. Its absence indicates that a class involved in a |
| 107 | + //! method definition, method overrider, or method call was not |
| 108 | + //! registered. In this case, signal a @ref missing_class using |
| 109 | + //! the registry's @ref error_handler if present; then calls `abort`. |
| 110 | + //! |
| 111 | + //! @param type The type_id to hash |
| 112 | + //! @return The hash value |
| 113 | + BOOST_FORCEINLINE |
| 114 | + static auto hash(type_id type) -> std::size_t { |
| 115 | + auto index = |
| 116 | + (mult * reinterpret_cast<detail::uintptr>(type)) >> shift; |
| 117 | + |
| 118 | + if constexpr (Registry::has_runtime_checks) { |
| 119 | + check(index, type); |
| 120 | + } |
| 121 | + |
| 122 | + return index; |
| 123 | + } |
| 124 | + |
| 125 | + //! Releases the memory allocated by `initialize`. |
| 126 | + //! |
| 127 | + //! @tparam Options... Zero or more option types, deduced from the function |
| 128 | + //! arguments. |
| 129 | + //! @param options Zero or more option objects. |
| 130 | + template<class... Options> |
| 131 | + static auto finalize(const std::tuple<Options...>&) -> void { |
| 132 | + detail::minimal_perfect_hash_control<Registry>.clear(); |
| 133 | + } |
| 134 | + }; |
| 135 | +}; |
| 136 | + |
| 137 | +template<class Registry> |
| 138 | +std::size_t minimal_perfect_hash::fn<Registry>::mult; |
| 139 | + |
| 140 | +template<class Registry> |
| 141 | +std::size_t minimal_perfect_hash::fn<Registry>::shift; |
| 142 | + |
| 143 | +template<class Registry> |
| 144 | +std::size_t minimal_perfect_hash::fn<Registry>::min_value; |
| 145 | + |
| 146 | +template<class Registry> |
| 147 | +std::size_t minimal_perfect_hash::fn<Registry>::max_value; |
| 148 | + |
| 149 | +template<class Registry> |
| 150 | +template<class InitializeContext, class... Options> |
| 151 | +void minimal_perfect_hash::fn<Registry>::initialize( |
| 152 | + const InitializeContext& ctx, std::vector<type_id>& buckets, |
| 153 | + const std::tuple<Options...>& options) { |
| 154 | + (void)options; |
| 155 | + |
| 156 | + const auto N = std::distance(ctx.classes_begin(), ctx.classes_end()); |
| 157 | + |
| 158 | + if constexpr (mp11::mp_contains<mp11::mp_list<Options...>, trace>::value) { |
| 159 | + Registry::output::os << "Finding minimal perfect hash factors for " << N << " types\n"; |
| 160 | + } |
| 161 | + |
| 162 | + // For minimal perfect hash, we need exactly N buckets |
| 163 | + std::size_t hash_size = N; |
| 164 | + |
| 165 | + if (hash_size == 0) { |
| 166 | + min_value = 0; |
| 167 | + max_value = 0; |
| 168 | + shift = 0; |
| 169 | + mult = 1; |
| 170 | + return; |
| 171 | + } |
| 172 | + |
| 173 | + std::default_random_engine rnd(13081963); |
| 174 | + std::size_t total_attempts = 0; |
| 175 | + |
| 176 | + // Calculate M (number of bits needed to represent hash_size) |
| 177 | + std::size_t M = 0; |
| 178 | + for (auto size = hash_size; size > 0; size >>= 1) { |
| 179 | + ++M; |
| 180 | + } |
| 181 | + if (M > 0) { |
| 182 | + M--; |
| 183 | + } |
| 184 | + |
| 185 | + std::uniform_int_distribution<std::size_t> uniform_dist; |
| 186 | + |
| 187 | + // Try increasing values of M for better distribution |
| 188 | + for (std::size_t pass = 0; pass < 4; ++pass, ++M) { |
| 189 | + shift = 8 * sizeof(type_id) - M; |
| 190 | + min_value = (std::numeric_limits<std::size_t>::max)(); |
| 191 | + max_value = (std::numeric_limits<std::size_t>::min)(); |
| 192 | + |
| 193 | + if constexpr (InitializeContext::template has_option<trace>) { |
| 194 | + ctx.tr << " trying with M = " << M << ", " << hash_size |
| 195 | + << " buckets (minimal)\n"; |
| 196 | + } |
| 197 | + |
| 198 | + std::size_t attempts = 0; |
| 199 | + buckets.resize(hash_size); |
| 200 | + |
| 201 | + while (attempts < 100000) { |
| 202 | + std::fill( |
| 203 | + buckets.begin(), buckets.end(), type_id(detail::uintptr_max)); |
| 204 | + ++attempts; |
| 205 | + ++total_attempts; |
| 206 | + mult = uniform_dist(rnd) | 1; |
| 207 | + |
| 208 | + bool collision_found = false; |
| 209 | + for (auto iter = ctx.classes_begin(); iter != ctx.classes_end(); |
| 210 | + ++iter) { |
| 211 | + for (auto type_iter = iter->type_id_begin(); |
| 212 | + type_iter != iter->type_id_end(); ++type_iter) { |
| 213 | + auto type = *type_iter; |
| 214 | + auto index = (detail::uintptr(type) * mult) >> shift; |
| 215 | + |
| 216 | + // For minimal perfect hash, index must be in [0, N) |
| 217 | + if (index >= hash_size) { |
| 218 | + collision_found = true; |
| 219 | + goto collision; |
| 220 | + } |
| 221 | + |
| 222 | + min_value = (std::min)(min_value, index); |
| 223 | + max_value = (std::max)(max_value, index); |
| 224 | + |
| 225 | + if (detail::uintptr(buckets[index]) != |
| 226 | + detail::uintptr_max) { |
| 227 | + collision_found = true; |
| 228 | + goto collision; |
| 229 | + } |
| 230 | + |
| 231 | + buckets[index] = type; |
| 232 | + } |
| 233 | + } |
| 234 | + |
| 235 | + // Verify that we have a minimal perfect hash (all buckets used) |
| 236 | + for (std::size_t i = 0; i < hash_size; ++i) { |
| 237 | + if (detail::uintptr(buckets[i]) == detail::uintptr_max) { |
| 238 | + collision_found = true; |
| 239 | + goto collision; |
| 240 | + } |
| 241 | + } |
| 242 | + |
| 243 | + if constexpr (InitializeContext::template has_option<trace>) { |
| 244 | + ctx.tr << " found " << mult << " after " << total_attempts |
| 245 | + << " attempts; span = [" << min_value << ", " |
| 246 | + << max_value << "], size = " << (max_value - min_value + 1) << "\n"; |
| 247 | + } |
| 248 | + |
| 249 | + return; |
| 250 | + |
| 251 | + collision: {} |
| 252 | + } |
| 253 | + } |
| 254 | + |
| 255 | + search_error error; |
| 256 | + error.attempts = total_attempts; |
| 257 | + error.buckets = hash_size; |
| 258 | + |
| 259 | + if constexpr (Registry::has_error_handler) { |
| 260 | + Registry::error_handler::error(error); |
| 261 | + } |
| 262 | + |
| 263 | + abort(); |
| 264 | +} |
| 265 | + |
| 266 | +template<class Registry> |
| 267 | +void minimal_perfect_hash::fn<Registry>::check(std::size_t index, type_id type) { |
| 268 | + if (index < min_value || index > max_value || |
| 269 | + detail::minimal_perfect_hash_control<Registry>[index] != type) { |
| 270 | + |
| 271 | + if constexpr (Registry::has_error_handler) { |
| 272 | + missing_class error; |
| 273 | + error.type = type; |
| 274 | + Registry::error_handler::error(error); |
| 275 | + } |
| 276 | + |
| 277 | + abort(); |
| 278 | + } |
| 279 | +} |
| 280 | + |
| 281 | +template<class Registry, class Stream> |
| 282 | +auto minimal_perfect_hash::search_error::write(Stream& os) const -> void { |
| 283 | + os << "could not find minimal perfect hash factors after " << attempts |
| 284 | + << " attempts using " << buckets << " buckets\n"; |
| 285 | +} |
| 286 | + |
| 287 | +} // namespace policies |
| 288 | +} // namespace boost::openmethod |
| 289 | + |
| 290 | +#endif |
0 commit comments