Skip to content

Commit e0d153e

Browse files
authored
Enum search for when hashing fails (#2263)
1 parent fc169a5 commit e0d153e

File tree

5 files changed

+660
-154
lines changed

5 files changed

+660
-154
lines changed

include/glaze/core/reflect.hpp

Lines changed: 165 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -635,7 +635,9 @@ namespace glz
635635
power_of_two, // Powers of 2 (flags): countr_zero(value)
636636
small_range, // Sparse lookup table for small ranges
637637
modular, // Perfect hash: (value * seed) % table_size
638-
modular_shifted // Perfect hash with shift: ((value >> shift) * seed) % table_size
638+
modular_shifted, // Perfect hash with shift: ((value >> shift) * seed) % table_size
639+
linear_search, // Fallback: linear scan through values (N <= 16)
640+
binary_search // Fallback: binary search through sorted values (N > 16)
639641
};
640642

641643
template <size_t N, size_t TableSize>
@@ -852,20 +854,29 @@ namespace glz
852854
return std::pair{false, uint64_t{0}};
853855
}();
854856

855-
static_assert(shifted_info.first, "Failed to find perfect hash seed for enum");
857+
if constexpr (shifted_info.first) {
858+
int_keys_info_t<N, table_size> info{.type = int_hash_type::modular_shifted,
859+
.seed = shifted_info.second,
860+
.table_size = table_size,
861+
.shift = common_shift};
862+
info.table.fill(static_cast<uint8_t>(N));
856863

857-
int_keys_info_t<N, table_size> info{.type = int_hash_type::modular_shifted,
858-
.seed = shifted_info.second,
859-
.table_size = table_size,
860-
.shift = common_shift};
861-
info.table.fill(static_cast<uint8_t>(N));
862-
863-
for (size_t i = 0; i < N; ++i) {
864-
const auto shifted = static_cast<uint64_t>(vals[i]) >> common_shift;
865-
const auto h = (shifted * info.seed) % table_size;
866-
info.table[h] = static_cast<uint8_t>(i);
864+
for (size_t i = 0; i < N; ++i) {
865+
const auto shifted = static_cast<uint64_t>(vals[i]) >> common_shift;
866+
const auto h = (shifted * info.seed) % table_size;
867+
info.table[h] = static_cast<uint8_t>(i);
868+
}
869+
return info;
870+
}
871+
else {
872+
// Fallback: linear search for small N, binary search for larger N
873+
if constexpr (N <= 16) {
874+
return int_keys_info_t<N, 0>{.type = int_hash_type::linear_search};
875+
}
876+
else {
877+
return int_keys_info_t<N, 0>{.type = int_hash_type::binary_search};
878+
}
867879
}
868-
return info;
869880
}
870881
}
871882
}
@@ -924,11 +935,73 @@ namespace glz
924935
const auto h = (static_cast<uint64_t>(value) * Info.seed) % Info.table_size;
925936
return Info.table[h]; // Returns N if slot is empty
926937
}
927-
else { // modular_shifted
938+
else if constexpr (Info.type == modular_shifted) {
928939
const auto shifted = static_cast<uint64_t>(value) >> Info.shift;
929940
const auto h = (shifted * Info.seed) % Info.table_size;
930941
return Info.table[h]; // Returns N if slot is empty
931942
}
943+
else if constexpr (Info.type == linear_search) {
944+
// Linear scan through enum values
945+
constexpr auto& values = enum_values_array<T>;
946+
for (size_t i = 0; i < N; ++i) {
947+
if (values[i] == value) {
948+
return i;
949+
}
950+
}
951+
return N; // Not found
952+
}
953+
else { // binary_search
954+
// Binary search through sorted enum values
955+
// Compute sorted indices and values together to avoid capture issues
956+
constexpr auto sorted_data = []() {
957+
struct result_t {
958+
std::array<size_t, N> indices{};
959+
std::array<U, N> values{};
960+
};
961+
result_t result{};
962+
963+
// Initialize indices
964+
for (size_t i = 0; i < N; ++i) {
965+
result.indices[i] = i;
966+
}
967+
968+
// Sort indices by their corresponding values (bubble sort for constexpr)
969+
constexpr auto& src_values = enum_values_array<T>;
970+
for (size_t i = 0; i < N - 1; ++i) {
971+
for (size_t j = i + 1; j < N; ++j) {
972+
if (src_values[result.indices[j]] < src_values[result.indices[i]]) {
973+
auto tmp = result.indices[i];
974+
result.indices[i] = result.indices[j];
975+
result.indices[j] = tmp;
976+
}
977+
}
978+
}
979+
980+
// Build sorted values array
981+
for (size_t i = 0; i < N; ++i) {
982+
result.values[i] = src_values[result.indices[i]];
983+
}
984+
985+
return result;
986+
}();
987+
988+
// Binary search
989+
size_t left = 0;
990+
size_t right = N;
991+
while (left < right) {
992+
const size_t mid = left + (right - left) / 2;
993+
if (sorted_data.values[mid] < value) {
994+
left = mid + 1;
995+
}
996+
else {
997+
right = mid;
998+
}
999+
}
1000+
if (left < N && sorted_data.values[left] == value) {
1001+
return sorted_data.indices[left];
1002+
}
1003+
return N; // Not found
1004+
}
9321005
}
9331006
};
9341007

@@ -2204,8 +2277,9 @@ namespace glz
22042277

22052278
GLZ_ALWAYS_INLINE static constexpr size_t op(auto&& it, auto end) noexcept
22062279
{
2207-
// For JSON we require at a minimum ":1} characters after a key (1 being a single char number)
2208-
// This means that we can require all these characters to exist for SWAR parsing
2280+
// Bounds checks ensure we can safely read the string content and determine its length.
2281+
// Note: This is used for both object keys and enum values, so we cannot assume
2282+
// extra characters exist after the closing quote (e.g., standalone enum: "value")
22092283

22102284
if constexpr (length_range == 0) {
22112285
if ((it + min_length) >= end) [[unlikely]] {
@@ -2217,7 +2291,9 @@ namespace glz
22172291
else {
22182292
if constexpr (length_range == 1) {
22192293
auto quote = it + min_length;
2220-
if ((quote + 1) >= end) [[unlikely]] {
2294+
// Ensure we can read *quote to determine if string is min_length or max_length.
2295+
// The check (quote + 1) > end ensures quote < end, making *quote dereferenceable.
2296+
if ((quote + 1) > end) [[unlikely]] {
22212297
return N;
22222298
}
22232299

@@ -2673,20 +2749,29 @@ namespace glz
26732749
return std::pair{false, uint64_t{0}};
26742750
}();
26752751

2676-
static_assert(shifted_info.first, "Failed to find perfect hash seed for variant int IDs");
2752+
if constexpr (shifted_info.first) {
2753+
int_keys_info_t<N, table_size> info{.type = int_hash_type::modular_shifted,
2754+
.seed = shifted_info.second,
2755+
.table_size = table_size,
2756+
.shift = common_shift};
2757+
info.table.fill(static_cast<uint8_t>(N));
26772758

2678-
int_keys_info_t<N, table_size> info{.type = int_hash_type::modular_shifted,
2679-
.seed = shifted_info.second,
2680-
.table_size = table_size,
2681-
.shift = common_shift};
2682-
info.table.fill(static_cast<uint8_t>(N));
2683-
2684-
for (size_t i = 0; i < N; ++i) {
2685-
const auto shifted = static_cast<uint64_t>(vals[i]) >> common_shift;
2686-
const auto h = (shifted * info.seed) % table_size;
2687-
info.table[h] = static_cast<uint8_t>(i);
2759+
for (size_t i = 0; i < N; ++i) {
2760+
const auto shifted = static_cast<uint64_t>(vals[i]) >> common_shift;
2761+
const auto h = (shifted * info.seed) % table_size;
2762+
info.table[h] = static_cast<uint8_t>(i);
2763+
}
2764+
return info;
2765+
}
2766+
else {
2767+
// Fallback: linear search for small N, binary search for larger N
2768+
if constexpr (N <= 16) {
2769+
return int_keys_info_t<N, 0>{.type = int_hash_type::linear_search};
2770+
}
2771+
else {
2772+
return int_keys_info_t<N, 0>{.type = int_hash_type::binary_search};
2773+
}
26882774
}
2689-
return info;
26902775
}
26912776
}
26922777
}
@@ -2759,11 +2844,61 @@ namespace glz
27592844
const auto h = (static_cast<uint64_t>(id) * Info.seed) % Info.table_size;
27602845
return Info.table[h];
27612846
}
2762-
else { // modular_shifted
2847+
else if constexpr (Info.type == modular_shifted) {
27632848
const auto shifted = static_cast<uint64_t>(id) >> Info.shift;
27642849
const auto h = (shifted * Info.seed) % Info.table_size;
27652850
return Info.table[h];
27662851
}
2852+
else if constexpr (Info.type == linear_search) {
2853+
for (size_t i = 0; i < N; ++i) {
2854+
if (ids_v<T>[i] == id) {
2855+
return i;
2856+
}
2857+
}
2858+
return N;
2859+
}
2860+
else { // binary_search
2861+
constexpr auto sorted_data = []() {
2862+
struct result_t {
2863+
std::array<size_t, N> indices{};
2864+
std::array<U, N> values{};
2865+
};
2866+
result_t result{};
2867+
2868+
for (size_t i = 0; i < N; ++i) {
2869+
result.indices[i] = i;
2870+
}
2871+
for (size_t i = 0; i < N - 1; ++i) {
2872+
for (size_t j = i + 1; j < N; ++j) {
2873+
if (ids_v<T>[result.indices[j]] < ids_v<T>[result.indices[i]]) {
2874+
auto tmp = result.indices[i];
2875+
result.indices[i] = result.indices[j];
2876+
result.indices[j] = tmp;
2877+
}
2878+
}
2879+
}
2880+
for (size_t i = 0; i < N; ++i) {
2881+
result.values[i] = ids_v<T>[result.indices[i]];
2882+
}
2883+
return result;
2884+
}();
2885+
2886+
size_t left = 0;
2887+
size_t right = N;
2888+
while (left < right) {
2889+
const size_t mid = left + (right - left) / 2;
2890+
if (sorted_data.values[mid] < id) {
2891+
left = mid + 1;
2892+
}
2893+
else {
2894+
right = mid;
2895+
}
2896+
}
2897+
if (left < N && sorted_data.values[left] == id) {
2898+
return sorted_data.indices[left];
2899+
}
2900+
return N;
2901+
}
27672902
}
27682903
};
27692904

tests/json_test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,3 +157,4 @@ add_executable(lazy_test lazy_test.cpp)
157157
target_link_libraries(lazy_test PRIVATE glz_test_common)
158158

159159
add_test(NAME lazy_test COMMAND lazy_test)
160+

tests/json_test/json_test.cpp

Lines changed: 0 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -349,130 +349,6 @@ suite enum_tests = [] {
349349
};
350350
};
351351

352-
// Issue #2246: Sparse enums with huge values that share common power-of-2 factors
353-
// These enums previously failed to compile with "Failed to find perfect hash seed for enum"
354-
// because values like 0 and 400,000,000 both hash to 0 with standard modular hash.
355-
// The fix adds a modular_shifted hash type that removes common trailing zeros before hashing.
356-
357-
enum class SparseEnum : int { Zero = 0, FourHundredMillion = 400000000 };
358-
359-
template <>
360-
struct glz::meta<SparseEnum>
361-
{
362-
using enum SparseEnum;
363-
static constexpr auto value = enumerate(Zero, FourHundredMillion);
364-
};
365-
366-
// Another sparse enum with values that have common factors
367-
enum class SparseEnumMillions : int { A = 0, B = 1000000, C = 2000000 };
368-
369-
template <>
370-
struct glz::meta<SparseEnumMillions>
371-
{
372-
using enum SparseEnumMillions;
373-
static constexpr auto value = enumerate(A, B, C);
374-
};
375-
376-
// Sparse enum with power-of-2 gaps (tests that standard modular still works when possible)
377-
enum class SparseEnumPow2 : int { X = 1, Y = 1024, Z = 65536 };
378-
379-
template <>
380-
struct glz::meta<SparseEnumPow2>
381-
{
382-
using enum SparseEnumPow2;
383-
static constexpr auto value = enumerate(X, Y, Z);
384-
};
385-
386-
// Test struct for sparse enum in object test
387-
struct SparseEnumTestStruct
388-
{
389-
SparseEnum e1{SparseEnum::Zero};
390-
SparseEnum e2{SparseEnum::FourHundredMillion};
391-
};
392-
393-
suite sparse_enum_tests = [] {
394-
"sparse_enum_serialization"_test = [] {
395-
// Test issue #2246: enum values 0 and 400,000,000
396-
SparseEnum e = SparseEnum::Zero;
397-
std::string json;
398-
expect(not glz::write_json(e, json));
399-
expect(json == "\"Zero\"") << json;
400-
401-
e = SparseEnum::FourHundredMillion;
402-
json.clear();
403-
expect(not glz::write_json(e, json));
404-
expect(json == "\"FourHundredMillion\"") << json;
405-
};
406-
407-
"sparse_enum_deserialization"_test = [] {
408-
SparseEnum e;
409-
expect(not glz::read_json(e, R"("Zero")"));
410-
expect(e == SparseEnum::Zero);
411-
412-
expect(not glz::read_json(e, R"("FourHundredMillion")"));
413-
expect(e == SparseEnum::FourHundredMillion);
414-
};
415-
416-
"sparse_enum_roundtrip"_test = [] {
417-
for (auto val : {SparseEnum::Zero, SparseEnum::FourHundredMillion}) {
418-
std::string json;
419-
expect(not glz::write_json(val, json));
420-
421-
SparseEnum parsed;
422-
expect(not glz::read_json(parsed, json));
423-
expect(parsed == val);
424-
}
425-
};
426-
427-
"sparse_enum_millions_roundtrip"_test = [] {
428-
for (auto val : {SparseEnumMillions::A, SparseEnumMillions::B, SparseEnumMillions::C}) {
429-
std::string json;
430-
expect(not glz::write_json(val, json));
431-
432-
SparseEnumMillions parsed;
433-
expect(not glz::read_json(parsed, json));
434-
expect(parsed == val);
435-
}
436-
};
437-
438-
"sparse_enum_pow2_roundtrip"_test = [] {
439-
for (auto val : {SparseEnumPow2::X, SparseEnumPow2::Y, SparseEnumPow2::Z}) {
440-
std::string json;
441-
expect(not glz::write_json(val, json));
442-
443-
SparseEnumPow2 parsed;
444-
expect(not glz::read_json(parsed, json));
445-
expect(parsed == val);
446-
}
447-
};
448-
449-
"sparse_enum_get_name"_test = [] {
450-
expect(glz::get_enum_name(SparseEnum::Zero) == "Zero");
451-
expect(glz::get_enum_name(SparseEnum::FourHundredMillion) == "FourHundredMillion");
452-
expect(glz::get_enum_name(SparseEnumMillions::A) == "A");
453-
expect(glz::get_enum_name(SparseEnumMillions::B) == "B");
454-
expect(glz::get_enum_name(SparseEnumMillions::C) == "C");
455-
};
456-
457-
"sparse_enum_invalid_value"_test = [] {
458-
// Test that invalid enum values return empty string
459-
auto invalid = static_cast<SparseEnum>(12345);
460-
expect(glz::get_enum_name(invalid).empty());
461-
};
462-
463-
"sparse_enum_in_struct"_test = [] {
464-
SparseEnumTestStruct obj;
465-
std::string json;
466-
expect(not glz::write_json(obj, json));
467-
expect(json == R"({"e1":"Zero","e2":"FourHundredMillion"})") << json;
468-
469-
SparseEnumTestStruct parsed;
470-
expect(not glz::read_json(parsed, json));
471-
expect(parsed.e1 == SparseEnum::Zero);
472-
expect(parsed.e2 == SparseEnum::FourHundredMillion);
473-
};
474-
};
475-
476352
static constexpr auto MY_ARRAY_MAX = 2;
477353

478354
struct MyArrayStruct

0 commit comments

Comments
 (0)