diff --git a/CMakePresets.json b/CMakePresets.json index b91a0cd74..037914b50 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -51,7 +51,7 @@ "cacheVariables" : { "CMAKE_COMPILE_WARNING_AS_ERROR": "ON", "CMAKE_C_FLAGS": "-Wall -Wpedantic -Wextra -Wno-error=unused-variable -Wno-error=newline-eof -Wno-unused-parameter", - "CMAKE_CXX_FLAGS": "-Wall -Wpedantic -Wno-c++17-attribute-extensions -Wno-unused-parameter" + "CMAKE_CXX_FLAGS": "-Wall -Wpedantic -Wno-c++17-attribute-extensions -Wno-unused-parameter -Wno-error=self-assign" } }, { diff --git a/src/clib/CMakeLists.txt b/src/clib/CMakeLists.txt index 693d6ea37..7d43b9997 100644 --- a/src/clib/CMakeLists.txt +++ b/src/clib/CMakeLists.txt @@ -126,6 +126,9 @@ add_library(Grackle_Grackle solve_rate_cool_g-cpp.cpp solve_rate_cool_g-cpp.h step_rate_gauss_seidel.hpp step_rate_newton_raphson.hpp + support/FrozenKeyIdxBiMap.hpp + support/FrozenKeyIdxBiMap_detail.hpp + support/fnv1a_hash.hpp time_deriv_0d.hpp utils-cpp.cpp utils-cpp.hpp utils-field.hpp diff --git a/src/clib/dust/grain_species_info.cpp b/src/clib/dust/grain_species_info.cpp index 3a6a76f80..38a9d59cc 100644 --- a/src/clib/dust/grain_species_info.cpp +++ b/src/clib/dust/grain_species_info.cpp @@ -15,6 +15,7 @@ #include "LUT.hpp" #include "grain_species_info.hpp" +#include "../support/FrozenKeyIdxBiMap.hpp" // The following logic effectively does 2 (related things): // 1. it serves as a human-readable registry of all known grain species and @@ -51,9 +52,8 @@ namespace { // stuff inside an anonymous namespace is local to this file /// - the ingredient list in the returned instance is **NOT** terminated by /// the sentinel grackle::impl::GrainSpeciesInfoEntry mk_gsp_info_entry_helper_( - int species_idx, int onlygrainsp_idx, const char* name, - bool h2dust_uses_carbonaceous_table, double sublimation_temperature, - double bulk_density_cgs, + int species_idx, bool h2dust_uses_carbonaceous_table, + double sublimation_temperature, double bulk_density_cgs, const grackle::impl::GrainGrowthIngredient* growth_ingredients) { using grackle::impl::GrainGrowthIngredient; @@ -76,8 +76,6 @@ grackle::impl::GrainSpeciesInfoEntry mk_gsp_info_entry_helper_( } return grackle::impl::GrainSpeciesInfoEntry{species_idx, - onlygrainsp_idx, - name, h2dust_uses_carbonaceous_table, sublimation_temperature, bulk_density_cgs, @@ -85,18 +83,23 @@ grackle::impl::GrainSpeciesInfoEntry mk_gsp_info_entry_helper_( out_ingredient_ptr}; } +// ugh, I don't like this... +grackle::impl::GrainSpeciesInfo mk_invalid_GrainSpeciesInfo() { + return {-1, nullptr, grackle::impl::mk_invalid_FrozenKeyIdxBiMap()}; +} + } // anonymous namespace grackle::impl::GrainSpeciesInfo grackle::impl::new_GrainSpeciesInfo( int dust_species_parameter) { - GrainSpeciesInfo out{-1, nullptr}; // indicates an error - out.n_species = get_n_grain_species(dust_species_parameter); - - if (out.n_species <= 0) { - return out; + int n_species = get_n_grain_species(dust_species_parameter); + if (n_species <= 0) { + return mk_invalid_GrainSpeciesInfo(); } - out.species_info = new GrainSpeciesInfoEntry[out.n_species]; + // names is allocated with the max number of known grain species + const char* names[OnlyGrainSpLUT::NUM_ENTRIES]; + GrainSpeciesInfoEntry* species_info = new GrainSpeciesInfoEntry[n_species]; // At the time of writing: // - we **only** use h2rate_carbonaceous_coef_table for the AC_dust @@ -126,10 +129,9 @@ grackle::impl::GrainSpeciesInfo grackle::impl::new_GrainSpeciesInfo( {1, SpLUT::SiOI, 44.}, {2, SpLUT::H2O, 18.}, GRIMPL_INGREDIENT_LIST_SENTINEL}; - out.species_info[0] = mk_gsp_info_entry_helper_( + names[0] = "MgSiO3_dust"; + species_info[0] = mk_gsp_info_entry_helper_( /* species_idx = */ SpLUT::MgSiO3_dust, - /* onlygrainsp_idx = */ OnlyGrainSpLUT::MgSiO3_dust, - /* name = */ "MgSiO3_dust", /* h2dust_uses_carbonaceous_table = */ false, /* sublimation_temperature = */ 1222.0, /* bulk_density_cgs = */ 3.20185, @@ -140,10 +142,9 @@ grackle::impl::GrainSpeciesInfo grackle::impl::new_GrainSpeciesInfo( // {coef, species_idx, particle mass} {1, SpLUT::CI, 12.}, GRIMPL_INGREDIENT_LIST_SENTINEL}; - out.species_info[1] = mk_gsp_info_entry_helper_( + names[1] = "AC_dust"; + species_info[1] = mk_gsp_info_entry_helper_( /* species_idx = */ SpLUT::AC_dust, - /* onlygrainsp_idx = */ OnlyGrainSpLUT::AC_dust, - /* name = */ "AC_dust", /* h2dust_uses_carbonaceous_table = */ true, /* sublimation_temperature = */ 1800.0, /* bulk_density_cgs = */ 2.27949, @@ -156,10 +157,9 @@ grackle::impl::GrainSpeciesInfo grackle::impl::new_GrainSpeciesInfo( // {coef, species_idx, particle mass} {1, SpLUT::SiI, 28.}, GRIMPL_INGREDIENT_LIST_SENTINEL}; - out.species_info[2] = mk_gsp_info_entry_helper_( + names[2] = "SiM_dust"; + species_info[2] = mk_gsp_info_entry_helper_( /* species_idx = */ SpLUT::SiM_dust, - /* onlygrainsp_idx = */ OnlyGrainSpLUT::SiM_dust, - /* name = */ "SiM_dust", /* h2dust_uses_carbonaceous_table = */ false, /* sublimation_temperature = */ 1500.0, /* bulk_density_cgs = */ 2.34118, @@ -170,10 +170,9 @@ grackle::impl::GrainSpeciesInfo grackle::impl::new_GrainSpeciesInfo( // {coef, species_idx, particle mass} {1, SpLUT::Fe, 56.}, GRIMPL_INGREDIENT_LIST_SENTINEL}; - out.species_info[3] = mk_gsp_info_entry_helper_( + names[3] = "FeM_dust"; + species_info[3] = mk_gsp_info_entry_helper_( /* species_idx = */ SpLUT::FeM_dust, - /* onlygrainsp_idx = */ OnlyGrainSpLUT::FeM_dust, - /* name = */ "FeM_dust", /* h2dust_uses_carbonaceous_table = */ false, /* sublimation_temperature = */ 1500.0, /* bulk_density_cgs = */ 7.95995, @@ -186,10 +185,9 @@ grackle::impl::GrainSpeciesInfo grackle::impl::new_GrainSpeciesInfo( {1, SpLUT::SiOI, 44.}, {3, SpLUT::H2O, 18.}, GRIMPL_INGREDIENT_LIST_SENTINEL}; - out.species_info[4] = mk_gsp_info_entry_helper_( + names[4] = "Mg2SiO4_dust"; + species_info[4] = mk_gsp_info_entry_helper_( /* species_idx = */ SpLUT::Mg2SiO4_dust, - /* onlygrainsp_idx = */ OnlyGrainSpLUT::Mg2SiO4_dust, - /* name = */ "Mg2SiO4_dust", /* h2dust_uses_carbonaceous_table = */ false, /* sublimation_temperature = */ 1277.0, /* bulk_density_cgs = */ 3.22133, @@ -201,10 +199,9 @@ grackle::impl::GrainSpeciesInfo grackle::impl::new_GrainSpeciesInfo( {3, SpLUT::Fe, 56.}, {4, SpLUT::H2O, 18.}, GRIMPL_INGREDIENT_LIST_SENTINEL}; - out.species_info[5] = mk_gsp_info_entry_helper_( + names[5] = "Fe3O4_dust"; + species_info[5] = mk_gsp_info_entry_helper_( /* species_idx = */ SpLUT::Fe3O4_dust, - /* onlygrainsp_idx = */ OnlyGrainSpLUT::Fe3O4_dust, - /* name = */ "Fe3O4_dust", /* h2dust_uses_carbonaceous_table = */ false, /* sublimation_temperature = */ 1500.0, /* bulk_density_cgs = */ 5.25096, @@ -215,10 +212,9 @@ grackle::impl::GrainSpeciesInfo grackle::impl::new_GrainSpeciesInfo( // {coef, species_idx, particle mass} {1, SpLUT::SiO2I, 60.}, GRIMPL_INGREDIENT_LIST_SENTINEL}; - out.species_info[6] = mk_gsp_info_entry_helper_( + names[6] = "SiO2_dust"; + species_info[6] = mk_gsp_info_entry_helper_( /* species_idx = */ SpLUT::SiO2_dust, - /* onlygrainsp_idx = */ OnlyGrainSpLUT::SiO2_dust, - /* name = */ "SiO2_dust", /* h2dust_uses_carbonaceous_table = */ false, /* sublimation_temperature = */ 1500.0, /* bulk_density_cgs = */ 2.66235, @@ -230,10 +226,9 @@ grackle::impl::GrainSpeciesInfo grackle::impl::new_GrainSpeciesInfo( {1, SpLUT::Mg, 24.}, {1, SpLUT::H2O, 18.}, GRIMPL_INGREDIENT_LIST_SENTINEL}; - out.species_info[7] = mk_gsp_info_entry_helper_( + names[7] = "MgO_dust"; + species_info[7] = mk_gsp_info_entry_helper_( /* species_idx = */ SpLUT::MgO_dust, - /* onlygrainsp_idx = */ OnlyGrainSpLUT::MgO_dust, - /* name = */ "MgO_dust", /* h2dust_uses_carbonaceous_table = */ false, /* sublimation_temperature = */ 1500.0, /* bulk_density_cgs = */ 3.58157, @@ -245,10 +240,9 @@ grackle::impl::GrainSpeciesInfo grackle::impl::new_GrainSpeciesInfo( {1, SpLUT::Fe, 56.}, {1, SpLUT::S, 32.}, GRIMPL_INGREDIENT_LIST_SENTINEL}; - out.species_info[8] = mk_gsp_info_entry_helper_( + names[8] = "FeS_dust"; + species_info[8] = mk_gsp_info_entry_helper_( /* species_idx = */ SpLUT::FeS_dust, - /* onlygrainsp_idx = */ OnlyGrainSpLUT::FeS_dust, - /* name = */ "FeS_dust", /* h2dust_uses_carbonaceous_table = */ false, /* sublimation_temperature = */ 680.0, /* bulk_density_cgs = */ 4.87265, @@ -260,10 +254,9 @@ grackle::impl::GrainSpeciesInfo grackle::impl::new_GrainSpeciesInfo( {2, SpLUT::Al, 27.}, {3, SpLUT::H2O, 18.}, GRIMPL_INGREDIENT_LIST_SENTINEL}; - out.species_info[9] = mk_gsp_info_entry_helper_( + names[9] = "Al2O3_dust"; + species_info[9] = mk_gsp_info_entry_helper_( /* species_idx = */ SpLUT::Al2O3_dust, - /* onlygrainsp_idx = */ OnlyGrainSpLUT::Al2O3_dust, - /* name = */ "Al2O3_dust", /* h2dust_uses_carbonaceous_table = */ false, /* sublimation_temperature = */ 1500.0, /* bulk_density_cgs = */ 4.01610, @@ -277,10 +270,9 @@ grackle::impl::GrainSpeciesInfo grackle::impl::new_GrainSpeciesInfo( // nominal growth rxn: "0.5CO + 0.5CH2 + 1.2N -> ref_org_dust" // nuclide ratios: C:H:O:N = 1:1:0.5:1.2 - out.species_info[10] = mk_gsp_info_entry_helper_( + names[10] = "ref_org_dust"; + species_info[10] = mk_gsp_info_entry_helper_( /* species_idx = */ SpLUT::ref_org_dust, - /* onlygrainsp_idx = */ OnlyGrainSpLUT::ref_org_dust, - /* name = */ "ref_org_dust", /* h2dust_uses_carbonaceous_table = */ false, /* sublimation_temperature = */ 575.0, /* bulk_density_cgs = */ 1.5, @@ -288,27 +280,34 @@ grackle::impl::GrainSpeciesInfo grackle::impl::new_GrainSpeciesInfo( // nominal growth rxn: "CO + 2H2I -> vol_org_dust" // effective formula: CH3OH - out.species_info[11] = mk_gsp_info_entry_helper_( + names[11] = "vol_org_dust"; + species_info[11] = mk_gsp_info_entry_helper_( /* species_idx = */ SpLUT::vol_org_dust, - /* onlygrainsp_idx = */ OnlyGrainSpLUT::vol_org_dust, - /* name = */ "vol_org_dust", /* h2dust_uses_carbonaceous_table = */ false, /* sublimation_temperature = */ 375.0, /* bulk_density_cgs = */ 1.0, /* growth_ingredients = */ nullptr); // nominal growth rxn: "H2O -> H2O_ice_dust" - out.species_info[12] = mk_gsp_info_entry_helper_( + names[12] = "H2O_ice_dust"; + species_info[12] = mk_gsp_info_entry_helper_( /* species_idx = */ SpLUT::H2O_ice_dust, - /* onlygrainsp_idx = */ OnlyGrainSpLUT::H2O_ice_dust, - /* name = */ "H2O_ice_dust", /* h2dust_uses_carbonaceous_table = */ false, /* sublimation_temperature = */ 153.0, /* bulk_density_cgs = */ 0.92, /* growth_ingredients = */ nullptr); } - return out; + GrainSpeciesInfo out{ + n_species, species_info, + new_FrozenKeyIdxBiMap(names, n_species, BiMapMode::COPIES_KEYDATA)}; + + if (FrozenKeyIdxBiMap_is_ok(&out.name_map)) { + return out; + } else { + drop_GrainSpeciesInfo(&out); + return mk_invalid_GrainSpeciesInfo(); + } } #undef GRIMPL_INGREDIENT_LIST_SENTINEL diff --git a/src/clib/dust/grain_species_info.hpp b/src/clib/dust/grain_species_info.hpp index 1f20c52ea..e64a59b06 100644 --- a/src/clib/dust/grain_species_info.hpp +++ b/src/clib/dust/grain_species_info.hpp @@ -13,6 +13,8 @@ #ifndef GRAIN_SPECIES_INFO_HPP #define GRAIN_SPECIES_INFO_HPP +#include "../support/FrozenKeyIdxBiMap.hpp" + namespace grackle::impl { /// holds information about a single gas species that is an ingredient for @@ -39,19 +41,6 @@ struct GrainSpeciesInfoEntry { /// the species index of the grain in the #GrainSpLUT lookup table int species_idx; - /// the species index of the grain in the #OnlyGrainSpLUT lookup table - /// - /// @note - /// It's frankly a little redundant to track this information (since an - /// instance of this struct is found at this index of an out.species_infoay) - int onlygrainsp_idx; - - /// name of the dust species - /// - /// @note - /// This primarily exists for debuging purposes - const char* name; - /// indicates whether to use the carbonaceous or silicate coefficient table /// to computing contributions of the grain species to the total h2dust rate /// (or the rate of H2 formation) @@ -92,17 +81,17 @@ struct GrainSpeciesInfoEntry { /// Relationship with OnlyGrainSpLUT /// -------------------------------- /// In the short term, the index of each species in the -/// @ref GrainSpeciesInfo::species_info out.species_infoay is dictated by the +/// @ref GrainSpeciesInfo::species_info out.species_info is dictated by the /// order of enumerators in the OnlyGrainSpLUT enumeration. /// /// In the medium term, we plan to entirely eliminate the OnlyGrainSpLUT -/// enumeration because all of the grain species can be treated very uniformly -/// uniformly. At the time of writing, just about every place where we would -/// use OnlyGrainSpLUT corresponds to a location where would enumerate every +/// enumeration because all of the grain species can be treated very uniformly. +/// At the time of writing, just about every place where we would use +/// OnlyGrainSpLUT corresponds to a location where we would enumerate every /// possible grain species and perform nearly identical operations on each /// species. In each case, it is straight-forward to replace these blocks of /// logic with for-loops (we just need to encode species-specific variations in -/// the calculations in out.species_infoays that have the same ordering as the +/// the calculations in out.species_info that have the same ordering as the /// species). To phrase it another way, in nearly all of the places where we /// would use OnlyGrainSpLUT, we don't need to know the grain species identity. /// @@ -113,9 +102,18 @@ struct GrainSpeciesInfo { /// number of grain species considered for the current Grackle configuration int n_species; - /// an out.species_infoay of length of length @ref n_species where each entry + /// an out.species_info of length of length @ref n_species where each entry /// holds info about a separate grain species GrainSpeciesInfoEntry* species_info; + + /// maps between grain species names and the associated index. The mapping is + /// **ALWAYS** consistent with ``OnlyGrainSpLUT``. + /// + /// @note + /// An argument could be made for storing this separately from the rest of + /// the struct since the core grackle calculations don't (or at least + /// shouldn't) use this data structure during the calculation. + FrozenKeyIdxBiMap name_map; }; /// return the number of grain species @@ -164,6 +162,7 @@ inline void drop_GrainSpeciesInfo(GrainSpeciesInfo* ptr) { } } delete[] ptr->species_info; + drop_FrozenKeyIdxBiMap(&ptr->name_map); // the following 2 lines are not strictly necessary, but they may help us // avoid a double-free and a dangling pointer ptr->n_species = 0; diff --git a/src/clib/support/FrozenKeyIdxBiMap.hpp b/src/clib/support/FrozenKeyIdxBiMap.hpp new file mode 100644 index 000000000..3057d6336 --- /dev/null +++ b/src/clib/support/FrozenKeyIdxBiMap.hpp @@ -0,0 +1,472 @@ +//===----------------------------------------------------------------------===// +// +// See the LICENSE file for license and copyright information +// SPDX-License-Identifier: NCSA AND BSD-3-Clause +// +//===----------------------------------------------------------------------===// +/// +/// @file +/// Declares the internal FrozenKeyIdxBiMap type +/// +//===----------------------------------------------------------------------===// +#ifndef SUPPORT_FROZENKEYIDXBIMAP_HPP +#define SUPPORT_FROZENKEYIDXBIMAP_HPP + +#include +#include +#include + +#include "status_reporting.h" +#include "FrozenKeyIdxBiMap_detail.hpp" + +namespace grackle::impl { + +// the following doxygen comment block logically groups every all parts of +// the (internal) API for Grackle's (internal) FrozenKeyIdxBiMap. It's useful +// when generating a doxygen webpage + +/// @defgroup bimap-grp FrozenKeyIdxBiMap Data Type +/// +/// FrozenKeyIdxBiMap provides specialized mapping functionality for internal +/// use within Grackle. The functionality is useful as a building-block for +/// runtime lookup-tables and other data types with map-like interface. +/// +/// The data type was implemented in a C-style. The FrozenKeyIdxBiMap struct +/// should be treated as an opaque type that is operated upon by a set of +/// associated functions. More idiomatic C++ (or languages like Rust & Swift), +/// the associated functions would be attached to the struct as methods +/** @{*/ + +/// describes the operating modes of @ref FrozenKeyIdxBiMap +enum class BiMapMode { + /// The preferred default mode, where the creation of a BiMap involves making + /// copies of each key (cleaning up a BiMap will deallocate the copies) + /// + /// In general, this is much safer, and it will be @b very useful in the + /// longer-term if we allow dynamic extension of chemistry networks. If we + /// adopt the embedded-key optimization (discussed in the FrozenKeyIdxBiMap), + /// this mode will probably be significantly faster. + COPIES_KEYDATA = 1, + /// This mode aims to reduce memory usage by having the BiMap reference + /// external keys. In other words, the BiMap won't attempt to manage + /// allocations holding each character in a string. + /// + /// @warning + /// For safety this should @b ONLY be used when all keys are immutable + /// string-literals (i.e. when the strings are valid for program's duration) + REFS_KEYDATA = 0, +}; + +/// @brief A bidirectional map (bimap), specialized to map @c n unique string +/// keys to unique indexes with values of @c 0 through @c (n-1) and +/// vice versa. The ordering & values of keys are set at creation and frozen. +/// +/// This type is useful in a number of scenarios. For example, it can be used +/// to implement a type representing a Map of arrays (where the values could +/// be part of a single contiguous array or are individual arrays). +/// +/// This type operates in 2 modes: @ref BiMapMode::COPIES_KEYDATA and +/// @ref BiMapMode::REFS_KEYDATA. Their docstrings provide more context. When +/// in doubt, prefer the former mode. +/// +/// @par Implementation Notes +/// At the time of writing, the type is primarily implemented in terms of +/// a hash table that uses open-addressing with linear probing to resolve +/// collisions. The implementation heavily draws from logic I wrote for Enzo-E: +/// https://github.com/enzo-project/enzo-e/blob/main/src/Cello/view_StringIndRdOnlyMap.hpp +/// (More details are provided below under C++ considerations) +/// +/// @par Why Frozen? +/// The contents are "frozen" for 3 primary reasons: +/// 1. It drastically simplifies the implementation (we don't have to worry +/// about deletion -- which can be quite messy) +/// 2. Linear-probing generally provides better data locality than other hash +/// collision resolution techniques, but generally has other drawbacks. +/// Freezing the contents lets us mitigate many drawbacks (mostly related to +/// the deletion operation) +/// 3. It could let us make copy operations cheaper. If we know the map won't +/// change, we could just use reference counting. +/// +/// @par Consideration: Reference Counting +/// The original C++ leverages @c std::shared_ptr to achieve reference counting +/// (and reduce the cost of copying). Theoretically, I would like to see us use +/// some kind of reference-counting too. But this is tricky in library code, +/// given the diversity of threading libraries that are not formally +/// interoperable. I think the only way to properly do this would be to come up +/// with a system for allowing registration of locks/atomics with Grackle as a +/// whole. +/// +/// @par C++ Considerations +/// It would definitely be worth evaluating whether we should embrace C++ +/// in order to convert this to a full-blown class and adopt characteristics +/// present in the original Enzo-E version: +/// - most importantly, it would greatly reduce the chance of memory leaks +/// - (much less importantly) it would be a lot more ergonomic (& less clunky) +/// - But, for reasons expressed above, I am concerned about using +/// @c std::shared_ptr for reference counting. +/// +/// @par +/// I would be stunned if std::map or +/// std::map is faster than the internal +/// hash table since @c std::map is usually implemented as a tree. +/// +/// @par Potential Improvements +/// Simple Ideas: +/// - We could be smarter about the order that we insert keys into the table +/// (in the constructor) to minimize the search time. +/// - We might be able to come up with a better hash function +/// +/// @par +/// A more ambitious idea is to embed string allocations within the rows for +/// @ref BiMapMode::COPIES_KEYDATA mode. This is possible thanks to the fact +/// that we use @ref bimap_detail::KEYLEN_MAX to limit the size of keys. +/// - Essentially, we would replace @ref bimap_StrU16_detail::Row with +/// something like the following: +/// @code{.cpp} +/// struct alignas(32) PackedRow { char data[32]; }; +/// +/// bool is_empty(PackedRow& r) { return data[0] == '\0' } +/// const char* get_key(PackedRow r) { return r.data; } +/// uint16_t get_val(Packed r) { +/// stdd::uint16_t o; +/// std::memcpy(&o, r.data+30, 2); +/// return o; +/// } +/// @endcode +/// - additional context about the preceding snippet: +/// - when empty, a @c PackedRow::data is filled with '\0' +/// - otherwise, @c PackedRow::data encodes the key-value pair: +/// - data[0:30] is the null-terminated key string ('\0' fills unused space) +/// - data[30:32] encodes the 16-bit value +/// - @c alignas(32) primarily ensures better cacheline alignment. +/// - Benefits of this change: +/// 1. better locality (if @c PackedRow is in the cache, so is the key-string) +/// 2. probing can use memcmp without a checking whether a row is empty +/// 3. with a little extra care, we could use the forced alignment of +/// @c PackedRow::data to compare strings with SIMD instructions +/// +/// @note +/// The contents of this struct should be considered an implementation +/// detail! Always prefer the associated functions (they are defined in such +/// a way that they should be inlined) +struct FrozenKeyIdxBiMap { + // don't forget to update FrozenKeyIdxBiMap_clone when changing members + + /// the number of contained strings + bimap_detail::rowidx_type length; + /// the number of elements in table_rows + bimap_detail::rowidx_type capacity; + /// max number of rows that must be probed to determine if a key is contained + bimap_detail::rowidx_type max_probe; + /// specifies ownership of keys, @see BiMapMode + BiMapMode mode; + + /// actual hash table data + bimap_StrU16_detail::Row* table_rows; + /// tracks the row indices to make iteration faster + bimap_detail::rowidx_type* ordered_row_indices; +}; + +/// Create an invalid FrozenKeyIdxBiMap +/// +/// @note +/// ugh, it's unfortunate that we need to make this... but for now it's useful. +/// Ideally, we would refactor so that we can get rid of this function. A +/// useful compromise might simply put it within the bimap_detail namespace +inline FrozenKeyIdxBiMap mk_invalid_FrozenKeyIdxBiMap() { + return FrozenKeyIdxBiMap{bimap_detail::INVALID_VAL, + bimap_detail::INVALID_VAL, + 0, + BiMapMode::REFS_KEYDATA, + nullptr, + nullptr}; +} + +/// Constructs a new FrozenKeyIdxBiMap +/// +/// @param[in] keys Sequence of 1 or more unique strings. Each string must +/// include at least 1 non-null character and be null-terminated +/// @param[in] key_count The length of keys +/// @param[in] mode specifies handling of keys. This will be passed on to any +/// clones that are made. +/// +/// @note +/// Callers should pass the returned value to @ref FrozenKeyIdxBiMap_is_ok +/// to check whether there was an error during creation. This is pretty +/// ugly/clunky, but it's the only practical way to achieve comparable behavior +/// to other internal data types. The best alternatives involve things like +/// std::optional or converting this type to a simple C++ class. +FrozenKeyIdxBiMap new_FrozenKeyIdxBiMap(const char* const keys[], int key_count, + BiMapMode mode); + +/// checks whether a creational function produced a valid bimap +/// +/// @param[in] ptr Points to the object being checked +/// @return true if the value is ok or false if the value is invalid +/// +/// @important +/// The interface of @ref FrozenKeyIdxBiMap sets values in a very particular +/// way to signal that FrozenKeyIdxBiMap is in an invalid state. This function +/// @b ONLY checks for that particular signature. +inline bool FrozenKeyIdxBiMap_is_ok(const FrozenKeyIdxBiMap* ptr) { + return ptr->length != bimap_detail::INVALID_VAL; +} + +/// Destroys the internal data tracked by an instance +/// +/// @param[in] ptr A non-null pointer to a valid bimap instance +/// +/// @warning +/// As with any C datatype, care is required to avoid issues with internal +/// dangling pointers. YOU SHOULD ONLY CALL THIS ONCE for a given instance +/// (and only if the instance was properly by the interface) +/// - while some efforts are made to reduce the possiblity of issues, some +/// things just can't be avoided (especially when it comes to shallow copies) +/// - here's a problematic example: +/// @code{.cpp} +/// FrozenKeyIdxBiMap bimap = new_FrozenKeyIdxBiMap( /**/ ); +/// // (the FrozenKeyIdxBiMap_is_ok check is elided for brevity) +/// +/// // you should generally avoid shallow copies (if possible) +/// FrozenKeyIdxBiMap shallow_cpy = bimap; +/// +/// // problems arise below (if we swap order, the 2nd call is still bad) +/// drop_FrozenKeyIdxBiMap(&shallow_cpy); // <- this is OK +/// drop_FrozenKeyIdxBiMap(&bimap); // <- this is BAD +/// @endcode +inline void drop_FrozenKeyIdxBiMap(FrozenKeyIdxBiMap* ptr) { + if (FrozenKeyIdxBiMap_is_ok(ptr)) { + if (ptr->length > 0) { + if (ptr->mode == BiMapMode::COPIES_KEYDATA) { + for (bimap_detail::rowidx_type i = 0; i < ptr->capacity; i++) { + bimap_StrU16_detail::Row* row = ptr->table_rows + i; + // casting from (const char*) to (char*) should be legal (as long as + // there were no bugs modifying the value of ptr->mode) + if (row->keylen > 0) { + delete[] row->key; + } + } + } + delete[] ptr->table_rows; + delete[] ptr->ordered_row_indices; + } // ptr->length > 0 + (*ptr) = mk_invalid_FrozenKeyIdxBiMap(); + } +} + +/// Makes a clone of the specified FrozenKeyIdxBiMap +/// +/// The clone inherits the original's BiMapMode value. If it held +/// BiMapMode::COPIES_KEYDATA, then fresh copies of the strings are made +/// +/// @note +/// Callers should pass the returned value to @ref FrozenKeyIdxBiMap_is_ok +/// to check whether there was an error during creation. This is pretty +/// ugly/clunky, but it's the only practical way to achieve comparable behavior +/// to other internal data types. The best alternatives involve things like +/// std::optional or converting this type to a simple C++ class. +FrozenKeyIdxBiMap FrozenKeyIdxBiMap_clone(const FrozenKeyIdxBiMap* ptr); + +namespace bimap { + +/// holds the result of a call to @ref FrozenKeyIdxBiMap_find +/// +/// @note +/// This is a C-style approximation of std::optional. Additionally, +/// the choice to make value a uint16_t is motivated by PR #484 +struct AccessRslt { + /// Indicates whether the value member is valid + bool has_value; + /// the loaded value (if has_value is false then this holds garbage) + uint16_t value; +}; + +} // namespace bimap + +/// lookup the value associated with the key +/// +/// This is the analog to calling `map[key]` in python. +/// +/// @param[in] map A pointer to a valid bimap +/// @param[in] key A null-terminated string +/// +/// @return An instance of @ref bimap::AccessRslt that encodes the value (if +/// the key is present) +inline bimap::AccessRslt FrozenKeyIdxBiMap_find(const FrozenKeyIdxBiMap* map, + const char* key) { + uint16_t tmp = bimap_StrU16_detail::search(map->table_rows, key, + map->capacity, map->max_probe) + .val; + return bimap::AccessRslt{tmp != bimap_detail::INVALID_VAL, tmp}; +} + +/// returns whether the map contains the key +/// +/// @param[in] map A pointer to a valid bimap +/// @param[in] key A null-terminated string +inline bool FrozenKeyIdxBiMap_contains(const FrozenKeyIdxBiMap* map, + const char* key) { + return FrozenKeyIdxBiMap_find(map, key).has_value; +} + +/// return the number of keys in the map +/// +/// @param[in] map A pointer to a valid bimap +inline int FrozenKeyIdxBiMap_size(const FrozenKeyIdxBiMap* map) { + return map->length; +} + +/// Return the key associated with the specified value +/// +/// For some context, if this function returns a string `s` for some index `i`, +/// then a call to @ref FrozenKeyIdxBiMap_find that passes `s` will +/// return `i` +/// +/// This is intended for use in situations where you briefly need the string +/// (i.e. and you plan to stop using the pointer before or at the same time as +/// the @p map is destroyed). In more detail: +/// - If the @p map was constructed in @ref BiMapMode::COPIES_KEYDATA mode, +/// returned strings have the same lifetime as @p map (i.e. they are +/// deallocated when the contents of @p map are deallocated). +/// - Otherwise, the returned string's allocation is externally managed. But, +/// any scenario where the allocation doesn't live at least as long as @p map, +/// is ill-formed +/// +/// @param[in] map A pointer to a valid bimap +/// @param[in] idx The index to check +/// @return The pointer to the appropriate key +inline const char* FrozenKeyIdxBiMap_inverse_find(const FrozenKeyIdxBiMap* map, + uint16_t idx) { + if (idx >= map->length) { + return nullptr; + } + const char* out = map->table_rows[map->ordered_row_indices[idx]].key; + GR_INTERNAL_REQUIRE(out != nullptr, "logical error: string can't be nullptr"); + return out; +} + +/** @}*/ // end of group + +namespace bimap_detail { + +/// a helper function used to actually allocate memory for FrozenKeyIdxBiMap +inline FrozenKeyIdxBiMap alloc(uint16_t length, uint16_t capacity, + BiMapMode mode) { + // it would be nice to handle allocate all pointers as a single block of + // memory, but that gets tricky. Essentially, we would allocate uninitialized + // memory and manually use placement-new (and the corresponding `delete`) + using bimap_detail::rowidx_type; + using bimap_StrU16_detail::Row; + FrozenKeyIdxBiMap out = { + /*length=*/length, + /*capacity=*/capacity, + /*max_probe=*/capacity, + /*mode=*/mode, + /*table_rows=*/(capacity > 0) ? new Row[capacity] : nullptr, + /*ordered_row_indices=*/(length > 0) ? new rowidx_type[length] : nullptr}; + for (uint16_t i = 0; i < capacity; i++) { + out.table_rows[i].keylen = 0; + } + return out; +} + +} // namespace bimap_detail + +inline FrozenKeyIdxBiMap new_FrozenKeyIdxBiMap(const char* const keys[], + int key_count, BiMapMode mode) { + int64_t max_len = static_cast(bimap_cap_detail::max_key_count()); + if (keys == nullptr && key_count == 0) { + return bimap_detail::alloc(0, 0, mode); + } else if (keys == nullptr) { + GrPrintErrMsg("keys must not be a nullptr"); + return mk_invalid_FrozenKeyIdxBiMap(); + } else if (key_count < 1 || static_cast(key_count) > max_len) { + GrPrintErrMsg("key_count must be positive & can't exceed %lld", max_len); + return mk_invalid_FrozenKeyIdxBiMap(); + } + + // based on the preceding check, this shouldn't be able to fail + bimap_detail::rowidx_type capacity = + bimap_cap_detail::calc_map_capacity(key_count); + GR_INTERNAL_REQUIRE(capacity > 0, "something went wrong"); + + // let's validate the keys + for (int i = 0; i < key_count; i++) { + GR_INTERNAL_REQUIRE(keys[i] != nullptr, "Can't specify a nullptr key"); + std::size_t n_chrs_without_nul = std::strlen(keys[i]); + if (n_chrs_without_nul == 0 || + n_chrs_without_nul > bimap_detail::KEYLEN_MAX) { + GrPrintErrMsg( + "calling strlen on \"%s\", the key @ index %d, yields 0 or a length " + "exceeding %d", + keys[i], i, bimap_detail::KEYLEN_MAX); + return mk_invalid_FrozenKeyIdxBiMap(); + } + // check uniqueness + for (int j = 0; j < i; j++) { + if (strcmp(keys[i], keys[j]) == 0) { + GrPrintErrMsg("\"%s\" key repeats", keys[i]); + return mk_invalid_FrozenKeyIdxBiMap(); + } + } + } + + // now, that we know we will succeed, lets construct the bimap + FrozenKeyIdxBiMap out = bimap_detail::alloc(key_count, capacity, mode); + + // now it's time to fill in the array + int max_probe_count = 1; + for (int i = 0; i < key_count; i++) { + // search for the first empty row + bimap_StrU16_detail::SearchRslt search_rslt = bimap_StrU16_detail::search( + out.table_rows, keys[i], capacity, capacity); + // this should be infallible (especially after we already did some checks) + GR_INTERNAL_REQUIRE(search_rslt.probe_count != 0, "sanity check failed"); + + // now we overwrite the row + bimap_StrU16_detail::overwrite_row(out.table_rows + search_rslt.rowidx, + keys[i], std::strlen(keys[i]), i, + mode == BiMapMode::COPIES_KEYDATA); + out.ordered_row_indices[i] = search_rslt.rowidx; + + max_probe_count = std::max(max_probe_count, search_rslt.probe_count); + } + out.max_probe = max_probe_count; + + return out; +} + +inline FrozenKeyIdxBiMap FrozenKeyIdxBiMap_clone(const FrozenKeyIdxBiMap* ptr) { + FrozenKeyIdxBiMap out = + bimap_detail::alloc(ptr->length, ptr->capacity, ptr->mode); + out.max_probe = ptr->max_probe; + + if (ptr->length == 0 || !FrozenKeyIdxBiMap_is_ok(ptr)) { + return out; + } + + // give the compiler/linter a hint that out.table_rows is not a nullptr + // (this is guaranteed by the preceding early exit) + GR_INTERNAL_REQUIRE( + (out.table_rows != nullptr) && (out.ordered_row_indices != nullptr), + "something is very wrong!"); + + bool copy_key_data = out.mode == BiMapMode::COPIES_KEYDATA; + for (bimap_detail::rowidx_type i = 0; i < ptr->capacity; i++) { + const bimap_StrU16_detail::Row& ref_row = ptr->table_rows[i]; + if (ref_row.keylen > 0) { + bimap_StrU16_detail::overwrite_row(out.table_rows + i, ref_row.key, + ref_row.keylen, ref_row.value, + copy_key_data); + } + } + + for (bimap_detail::rowidx_type i = 0; i < ptr->length; i++) { + out.ordered_row_indices[i] = ptr->ordered_row_indices[i]; + } + return out; +}; + +} // namespace grackle::impl + +#endif // SUPPORT_FROZENKEYIDXBIMAP_HPP diff --git a/src/clib/support/FrozenKeyIdxBiMap_detail.hpp b/src/clib/support/FrozenKeyIdxBiMap_detail.hpp new file mode 100644 index 000000000..24cacd4b9 --- /dev/null +++ b/src/clib/support/FrozenKeyIdxBiMap_detail.hpp @@ -0,0 +1,199 @@ +//===----------------------------------------------------------------------===// +// +// See the LICENSE file for license and copyright information +// SPDX-License-Identifier: NCSA AND BSD-3-Clause +// +//===----------------------------------------------------------------------===// +/// +/// @file +/// Defines the hash table machinery used to implement the internals of the +/// FrozenKeyIdxBiMap type +/// +//===----------------------------------------------------------------------===// +#ifndef SUPPORT_FROZENKEYIDXBIMAP_DETAIL_HPP +#define SUPPORT_FROZENKEYIDXBIMAP_DETAIL_HPP + +#include // std:::min +#include +#include // std::memcmp +#include +#include "status_reporting.h" + +#include "fnv1a_hash.hpp" + +namespace grackle::impl { + +/// This namespace holds a generic constants and typedefs +namespace bimap_detail { +/// specifies an invalid value of the map +/// +/// In other words, a map must have fewer entries than the maximum possible u16 +/// value +inline constexpr uint16_t INVALID_VAL = std::numeric_limits::max(); + +/// specifies maximum allowed length of a key (excluding the null terminator). +/// +/// @note +/// While the value may seem low, it's probably large enough. Restricting +/// strings to 30 elements (including the terminator) allows for a hypothetical +/// optimization (see the @ref FrozenKeyIdxBiMap docstring for info). +inline constexpr uint16_t KEYLEN_MAX = 29; + +/// the type used for indexing the rows of the BiMap's internal hash table +/// +/// @note +/// This does not necessarily need to be a uint16_t (even if the internal +/// values of the hash table are uint16_t). It just needs to be able to hold +/// the maximum possible value +typedef uint16_t rowidx_type; + +} // namespace bimap_detail + +// ----------------------------------------------------------------- + +/// Encloses code and logic pertaining to the capacity of a FrozenKeyIdxBiMap +namespace bimap_cap_detail { + +/// the load factor specifies the fraction of the capacity of the Hash +/// table that is filled. This should be an integer. +/// +/// Generally, the larger this is, the fewer collisions there are, but the more +/// memory is required. Lookups probably get slower if it's too big +inline constexpr int INVERSE_LOAD_FACTOR = 2; +static_assert(INVERSE_LOAD_FACTOR > 1); + +/// list of allowed capacities. These are all prime numbers that +/// have nearly constant linear spacing (it may make more sense to have +/// logarithmic spacing) +inline constexpr uint32_t CAPACITIES_LIST[] = { + // each number increases by ~10 in this first batch + 7, 19, 31, 41, 53, 61, 71, 83, 89, 101, 113, 127, 139, 149, 163, 173, 181, + 191, 199, 211, 223, 233, 241, 251, + // probably won't use these (so we increase spacing: + 293, 401, 503, 601, 701, 797, 907, 997}; + +inline constexpr int N_CAPACITIES = sizeof(CAPACITIES_LIST) / sizeof(uint32_t); + +/// compute the maximum number of keys +inline bimap_detail::rowidx_type max_key_count() { + return std::min( + std::numeric_limits::max(), + CAPACITIES_LIST[N_CAPACITIES - 1] / INVERSE_LOAD_FACTOR); +} + +/// compute the capacity of the map (a value of 0 indicates that a large enough +/// capacity can't be found) +/// +/// @param key_count the desired number of keys (should be positive) +inline uint16_t calc_map_capacity(int key_count) { + uint64_t c = INVERSE_LOAD_FACTOR * static_cast(key_count); + for (int i = 0; i < N_CAPACITIES; i++) { // binary search may be faster + if (c < CAPACITIES_LIST[i]) { + return static_cast(CAPACITIES_LIST[i]); + } + } + return 0; +} + +} // namespace bimap_cap_detail + +// ----------------------------------------------------------------- + +/// Holds machinery for hash tablea that implement FrozenKeyIdxBiMap +/// +/// This machinery is specialized for (string, u16) key-value pairs +namespace bimap_StrU16_detail { + +/// entry in a hash table +/// +/// This acts as a (key,value) pair with a little extra metadata. A hash table +/// is fundamentally an array of these instances +/// +/// @note members are ordered to minimize the struct size (i.e. smallest members +/// listed first) to pack as many entries into a cacheline as possible +struct Row { + /// specifies the value associated with the current key + uint16_t value; + + /// specifies the length of the key (not including the '\0') + /// + /// @note Tracked for short-circuiting comparisons (while probing collisions) + uint16_t keylen; + /// identifies the address of this entry's key + const char* key; +}; + +static void overwrite_row(Row* row, const char* key, uint16_t keylen, + uint16_t value, bool copy_key_data) { + GR_INTERNAL_REQUIRE(row->keylen == 0, "Sanity check failed!"); + row->value = value; + row->keylen = keylen; + const char* key_ptr = key; + if (copy_key_data) { + std::size_t total_len = keylen + 1; // <- add 1 to account for '\0' + char* ptr = new char[total_len]; + std::memcpy(ptr, key, total_len); + key_ptr = ptr; + } + *row = Row{value, keylen, key_ptr}; +} + +/// represents the result of an internal search for a key +/// +/// @note +/// As a rule of thumb, it's generally better (for compiler optimization) to +/// return a struct of integers than rely on modifying pointer arguments +struct SearchRslt { + /// specifies value found by the search (or @ref bimap_detail::INVALID_VAL) + uint16_t val; + /// specified the number of probes before the search returned + int probe_count; + /// specify the index of the "row" corresponding to the search result + int rowidx; +}; + +/// Search for the row matching key. The search ends when a match is found, an +/// an empty row is found, or the function has probed `max_probe` entries +/// +/// @param rows an array of rows to search to be compared +/// @param key the key to be compared +/// @param capacity the length of the rows array +/// @param max_probe the maximum number of rows to check before giving up +/// +/// @important +/// The behavior is undefined if @p key is a @c nullptr, @p keylen is 0, or +/// @p keylen exceeds @p bimap::keylen +/// +/// @note +/// This is declared as `static inline` to facilitate inlining within +/// FrozenKeyIdxBiMap's interface API. +inline SearchRslt search(const Row* rows, const char* key, int capacity, + int max_probe) { + GR_INTERNAL_REQUIRE(key != nullptr, "Major programming oversight"); + max_probe = (max_probe <= 0 || max_probe > capacity) ? capacity : max_probe; + + HashRsltPack h = fnv1a_hash(key); + int i = -1; // <- set to a dummy value + int launched_probes = 0; + if (h.keylen > 0 && h.success && max_probe > 0) { + int guess_i = static_cast(h.hash % capacity); // <- initial guess + + do { // circularly loop over rows to search for key (start at guess_i) + i = (guess_i + launched_probes) % capacity; + launched_probes++; // <- about to perform a new probe + const Row& r = rows[i]; + + if (r.keylen == h.keylen && std::memcmp(r.key, key, h.keylen) == 0) { + return SearchRslt{r.value, launched_probes, i}; // match found! + } + + // check if rows[i] is empty or if we have hit the limit on searches + } while (rows[i].keylen != 0 && launched_probes < max_probe); + } + + return SearchRslt{bimap_detail::INVALID_VAL, launched_probes, i}; +} + +} // namespace bimap_StrU16_detail +} // namespace grackle::impl +#endif // SUPPORT_FROZENKEYIDXBIMAP_DETAIL_HPP diff --git a/src/clib/support/fnv1a_hash.hpp b/src/clib/support/fnv1a_hash.hpp new file mode 100644 index 000000000..6aafca022 --- /dev/null +++ b/src/clib/support/fnv1a_hash.hpp @@ -0,0 +1,63 @@ +//===----------------------------------------------------------------------===// +// +// See the LICENSE file for license and copyright information +// SPDX-License-Identifier: NCSA AND BSD-3-Clause +// +//===----------------------------------------------------------------------===// +/// +/// @file +/// implements the 32-bit fnv-1a hash function +/// +//===----------------------------------------------------------------------===// +#ifndef SUPPORT_FNV1A_HASH_HPP +#define SUPPORT_FNV1A_HASH_HPP + +#include +#include + +namespace grackle::impl { + +/// Holds the result of a call to fnv1a_hash +struct HashRsltPack { + bool success; + std::uint16_t keylen; + std::uint32_t hash; +}; + +/// calculate 32-bit FNV-1a hash of key and measures the key's length. +/// +/// @tparam MaxKeyLen the max number of characters in key (excluding '\0'). By +/// default, it's the largest value HashRsltPack::keylen holds. A smaller +/// value can be specified as an optimization. +/// @param key the null-terminated string. Behavior is deliberately undefined +/// when passed a `nullptr` +/// +/// @note +/// The current implementation prioritizes convenience. We may want to evaluate +/// whether alternatives (e.g. fxhash) are faster or have fewer collisions with +/// our typical keys. +/// +/// @warning +/// Obviously this is @b NOT cryptographically secure +template ::max()> +HashRsltPack fnv1a_hash(const char* key) { + static_assert( + 0 <= MaxKeyLen && MaxKeyLen <= std::numeric_limits::max(), + "MaxKeyLen can't be encoded by HashRsltPack"); + + constexpr std::uint32_t prime = 16777619; + constexpr std::uint32_t offset = 2166136261; + + std::uint32_t hash = offset; + for (int i = 0; i <= MaxKeyLen; i++) { // the `<=` is intentional + if (key[i] == '\0') { + return HashRsltPack{true, static_cast(i), hash}; + } + hash = (hash ^ key[i]) * prime; + } + return HashRsltPack{false, 0, 0}; +} + +} // namespace grackle::impl + +#endif // SUPPORT_FNV1A_HASH_HPP diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index f1018e3ca..f28d17f69 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -48,6 +48,11 @@ target_compile_definitions(grtest_utils # start declaring targets for tests # --------------------------------- +add_executable(testFrozenKeyIdxBiMap + test_unit_hash.cpp test_unit_FrozenKeyIdxBiMap.cpp) +target_link_libraries(testFrozenKeyIdxBiMap testdeps GTest::gmock) +gtest_discover_tests(testFrozenKeyIdxBiMap) + add_executable(runInterpolationTests test_unit_interpolators_g.cpp) target_link_libraries(runInterpolationTests testdeps) diff --git a/tests/unit/test_grain_species_info.cpp b/tests/unit/test_grain_species_info.cpp index 19bd87f4e..f48ed85ef 100644 --- a/tests/unit/test_grain_species_info.cpp +++ b/tests/unit/test_grain_species_info.cpp @@ -17,6 +17,7 @@ #include "LUT.hpp" #include "dust/grain_species_info.hpp" +#include "support/FrozenKeyIdxBiMap.hpp" namespace { // stuff in an anonymous namespace is local to this file @@ -130,27 +131,48 @@ class GrainSpeciesInfoTest : public testing::TestWithParam { unique_GrainSpeciesInfo_ptr grain_species_info_; }; +// the following test is somewhat redundant with the SpeciesLUTCompare tests +// and is more work to maintain TEST_P(GrainSpeciesInfoTest, CheckOnlyGrainSpeciesLUTConsistency) { + // construct a vector with an element for each entry in OnlyGrainSpeciesLUT + std::vector ref_l{ + {"MgSiO3_dust", OnlyGrainSpLUT::MgSiO3_dust}, + {"AC_dust", OnlyGrainSpLUT::AC_dust}, + {"SiM_dust", OnlyGrainSpLUT::SiM_dust}, + {"FeM_dust", OnlyGrainSpLUT::FeM_dust}, + {"Mg2SiO4_dust", OnlyGrainSpLUT::Mg2SiO4_dust}, + {"Fe3O4_dust", OnlyGrainSpLUT::Fe3O4_dust}, + {"SiO2_dust", OnlyGrainSpLUT::SiO2_dust}, + {"MgO_dust", OnlyGrainSpLUT::MgO_dust}, + {"FeS_dust", OnlyGrainSpLUT::FeS_dust}, + {"Al2O3_dust", OnlyGrainSpLUT::Al2O3_dust}, + {"ref_org_dust", OnlyGrainSpLUT::ref_org_dust}, + {"vol_org_dust", OnlyGrainSpLUT::vol_org_dust}, + {"H2O_ice_dust", OnlyGrainSpLUT::H2O_ice_dust}, + }; + const int n_species = grain_species_info_->n_species; for (int i = 0; i < n_species; i++) { - // sanity check! - ASSERT_NE(grain_species_info_->species_info[i].name, nullptr); - // actual check! - EXPECT_EQ(i, grain_species_info_->species_info[i].onlygrainsp_idx) - << "element " << i << " of the GrainSpeciesInfo::species_info array " - << "doesn't seem to be synchronized with the OnlyGrainSpeciesLUT " - << "enumeration. At face value (there aren't other related bugs), " - << "OnlyGrainSpeciesLUT::" << grain_species_info_->species_info[i].name - << " seems to have a value of " - << grain_species_info_->species_info[i].onlygrainsp_idx; + const char* name = grackle::impl::FrozenKeyIdxBiMap_inverse_find( + &grain_species_info_->name_map, static_cast(i)); + + ASSERT_NE(name, nullptr); // sanity check! + EXPECT_EQ(i, ref_l[i].index); // sanity check! + + // actual check: + EXPECT_EQ(std::string(name), ref_l[i].name) + << "the grain species associated with index " << i << " in the " + << "GrainSpeciesInfo instance doesn't seem to be synchronized with " + << "the OnlyGrainSpeciesLUT enumeration."; } } TEST_P(GrainSpeciesInfoTest, SublimationTemperature) { const int n_species = grain_species_info_->n_species; for (int i = 0; i < n_species; i++) { - // sanity check! - ASSERT_NE(grain_species_info_->species_info[i].name, nullptr); + const char* name = grackle::impl::FrozenKeyIdxBiMap_inverse_find( + &grain_species_info_->name_map, static_cast(i)); + ASSERT_NE(name, nullptr); // sanity check! // actual check! EXPECT_GT(grain_species_info_->species_info[i].sublimation_temperature, 0) << "element " << i << " of the GrainSpeciesInfo::species_info array " @@ -165,10 +187,11 @@ TEST_P(GrainSpeciesInfoTest, SpeciesLUTCompare) { const int n_species = grain_species_info_->n_species; for (int i = 0; i < n_species; i++) { - // sanity check! - ASSERT_NE(grain_species_info_->species_info[i].name, nullptr); + const char* name_cstr = grackle::impl::FrozenKeyIdxBiMap_inverse_find( + &grain_species_info_->name_map, static_cast(i)); + ASSERT_NE(name_cstr, nullptr); // sanity check! // actual check! - std::string actual_name(grain_species_info_->species_info[i].name); + std::string actual_name(name_cstr); EXPECT_EQ(actual_name, ref_list[i].name) << "according to the reference list, the grain species at index " << i << " should be `" << ref_list[i].name << "` (not `" << actual_name @@ -194,17 +217,7 @@ INSTANTIATE_TEST_SUITE_P( // check the GrainSpeciesInfo object when constructed from dust_species // parameters that hold extreme values TEST(GrainSpeciesInfoTestMisc, DustSpeciesExtremeValues) { - { - unique_GrainSpeciesInfo_ptr ptr = make_unique_GrainSpeciesInfo(0); - EXPECT_EQ(ptr->n_species, 0) - << "GrainSpeciesInfo::n_species should be 0 when the dust_species " - << "parameter is 0."; - EXPECT_EQ(ptr->species_info, nullptr) - << "GrainSpeciesInfo::species_info should be a nullptr when " - << "dust_species parameter is 0."; - } - - int invalid_dust_species_values[2] = {-42423, MAX_dust_species_VAL + 1}; + int invalid_dust_species_values[3] = {-42423, 0, MAX_dust_species_VAL + 1}; for (auto dust_species_param : invalid_dust_species_values) { unique_GrainSpeciesInfo_ptr ptr = make_unique_GrainSpeciesInfo(dust_species_param); diff --git a/tests/unit/test_unit_FrozenKeyIdxBiMap.cpp b/tests/unit/test_unit_FrozenKeyIdxBiMap.cpp new file mode 100644 index 000000000..148c3270e --- /dev/null +++ b/tests/unit/test_unit_FrozenKeyIdxBiMap.cpp @@ -0,0 +1,377 @@ +//===----------------------------------------------------------------------===// +// +// See the LICENSE file for license and copyright information +// SPDX-License-Identifier: NCSA AND BSD-3-Clause +// +//===----------------------------------------------------------------------===// +/// +/// @file +/// tests the @ref grackle::impl::FrozenKeyIdxBiMap +/// +//===----------------------------------------------------------------------===// + +#include // needed to teach googletest how to print +#include +#include + +#include +#include +#include "support/FrozenKeyIdxBiMap.hpp" +#include "grackle.h" + +// teach GoogleTest how to print grackle::impl::bimap::AccessRslt for more +// informative errors (otherwise it just shows the memory's raw byte values) +namespace grackle::impl::bimap { +void PrintTo(const AccessRslt& ar, std::ostream* os) { + std::string tmp = (ar.has_value) ? std::to_string(ar.value) : ""; + *os << "{has_value=" << ar.has_value << ", value=" << tmp << '}'; +} +} // namespace grackle::impl::bimap + +std::string prep_descr(std::string descr, bool negation) { + return ((negation) ? "isn't " : "is ") + descr; +} + +// the following defines a custom matcher for checking whether the has_value +// member of an AccessRslt instance is false. Use via +// EXPECT_THAT(arg, EmptyAccessRslt) +MATCHER(EmptyAccessRslt, prep_descr("an empty AccessRslt", negation)) { + if (!arg.has_value) { + return true; + } + *result_listener << "holds the " << arg.value << " value"; + return false; +} + +// the following defines a custom matcher for checking whether the has_value +// member of an AccessRslt instance is true +// EXPECT_THAT(arg, AccessRsltHolding(value)) +MATCHER_P(AccessRsltHolds, v, + prep_descr("an AccessRslt holding " + std::to_string(v), negation)) { + if (!arg.has_value) { + *result_listener << "is empty"; + } else if (arg.value != v) { + *result_listener << " holds the value, " << v; + } + return arg.has_value && arg.value == v; +} + +// this top test was introduced to provide a more concrete example +// of how we might use FrozenKeyIdxBiMap + +TEST(FrozenKeyIdxBiMap, FullExample) { + // THE SCENARIO: we have a list of unique ordered strings + // + // We are going build a FrozenKeyIdxBiMap instance from the following list. + // The resulting object is a bidirectional map that can both: + // 1. map a string to its index (at the time of construction) in the list. + // - example: "HII" is mapped to 2 + // - example: "O2II" is mapped to 33 + // 2. perform the reverse mapping (i.e. index -> string) + // - example: 2 is mapped to "HII" + // - example: 33 is mapped to "O2II" + // + // It's worth emphasizing that the mapping is frozen when its constructed & + // contents can't be changed (even if you reorder the original) + const char* keys[34] = { + "e", "HI", "HII", "HeI", "HeII", "HeIII", "HM", "H2I", "H2II", + "DI", "DII", "HDI", "DM", "HDII", "HeHII", "CI", "CII", "CO", + "CO2", "OI", "OH", "H2O", "O2", "SiI", "SiOI", "SiO2I", "CH", + "CH2", "COII", "OII", "OHII", "H2OII", "H3OII", "O2II"}; + + namespace grimpl = grackle::impl; + + // PART 1: build a FrozenKeyIdxBiMap from this list + // the 3rd argument tells the string to make copies of each string + grimpl::FrozenKeyIdxBiMap m = grimpl::new_FrozenKeyIdxBiMap( + keys, 34, grimpl::BiMapMode::COPIES_KEYDATA); + + // before we use it, we should confirm the constructor succeeded + if (!grimpl::FrozenKeyIdxBiMap_is_ok(&m)) { + FAIL() << "creation of the m failed unexpectedly"; + } + + // PART 2: let's show some examples of lookups from names + + // Equivalent Python: `2 == m["HII"]` + EXPECT_THAT(grimpl::FrozenKeyIdxBiMap_find(&m, "HII"), + AccessRsltHolds(2)); // aka AccessRslt{has_value=true, value=2} + + // Equivalent Python/idiomatic C++: `33 == m["O2II"]` + EXPECT_THAT(grimpl::FrozenKeyIdxBiMap_find(&m, "O2II"), AccessRsltHolds(33)); + + // for unknown key, returns AccessRslt{has_value=false, value=} + EXPECT_THAT(grimpl::FrozenKeyIdxBiMap_find(&m, "Dummy"), EmptyAccessRslt()); + + // PART 3: let's show the reverse of the previous lookups + EXPECT_STREQ("HII", grimpl::FrozenKeyIdxBiMap_inverse_find(&m, 2)); + EXPECT_STREQ("O2II", grimpl::FrozenKeyIdxBiMap_inverse_find(&m, 33)); + + // Behavior is again well-defined when passing an invalid index + EXPECT_EQ(nullptr, grimpl::FrozenKeyIdxBiMap_inverse_find(&m, 131)); + + // PART 4: We can also query the length + EXPECT_EQ(34, grimpl::FrozenKeyIdxBiMap_size(&m)); + + // Finally, to cleanup we will deallocate data tracked internally by `m` + grimpl::drop_FrozenKeyIdxBiMap(&m); +} + +// validate basic operations for an empty bimap +TEST(FrozenKeyIdxBiMap, EmptyBasicOps) { + grackle::impl::FrozenKeyIdxBiMap m = grackle::impl::new_FrozenKeyIdxBiMap( + nullptr, 0, grackle::impl::BiMapMode::COPIES_KEYDATA); + ASSERT_TRUE(grackle::impl::FrozenKeyIdxBiMap_is_ok(&m)) + << "construction of a FrozenKeyIdxBiMap unexpectedly failed"; + + EXPECT_EQ(0, grackle::impl::FrozenKeyIdxBiMap_size(&m)) + << "an empty mapping should have a size of 0"; + + EXPECT_THAT(grackle::impl::FrozenKeyIdxBiMap_find(&m, "key"), + EmptyAccessRslt()) + << "key lookup should always fail for an empty mapping"; + + EXPECT_EQ(nullptr, grackle::impl::FrozenKeyIdxBiMap_inverse_find(&m, 0)) + << "index lookup should always fail for an empty mapping"; + + grackle::impl::drop_FrozenKeyIdxBiMap(&m); +} + +// validate behavior of clone for an empty bimap +TEST(FrozenKeyIdxBiMap, EmptyClone) { + // make the original + grackle::impl::FrozenKeyIdxBiMap m = grackle::impl::new_FrozenKeyIdxBiMap( + nullptr, 0, grackle::impl::BiMapMode::COPIES_KEYDATA); + ASSERT_TRUE(grackle::impl::FrozenKeyIdxBiMap_is_ok(&m)) + << "construction of a FrozenKeyIdxBiMap unexpectedly failed"; + + // make the clone + grackle::impl::FrozenKeyIdxBiMap m_clone = + grackle::impl::FrozenKeyIdxBiMap_clone(&m); + + bool success = grackle::impl::FrozenKeyIdxBiMap_is_ok(&m_clone); + + grackle::impl::drop_FrozenKeyIdxBiMap(&m); // drop the original + + if (success) { + grackle::impl::drop_FrozenKeyIdxBiMap(&m_clone); + } else { + FAIL() << "cloning an empty mapping failed!"; + } +} + +class FrozenKeyIdxBiMapConstructorSuite + : public testing::TestWithParam { + // You can implement all the usual fixture class members here. + // To access the test parameter, call GetParam() from class + // TestWithParam. +}; + +TEST_P(FrozenKeyIdxBiMapConstructorSuite, Simple) { + const char* keys[] = {"denisty", "internal_energy"}; + + grackle::impl::FrozenKeyIdxBiMap tmp = + grackle::impl::new_FrozenKeyIdxBiMap(keys, 2, GetParam()); + + EXPECT_TRUE(grackle::impl::FrozenKeyIdxBiMap_is_ok(&tmp)); + grackle::impl::drop_FrozenKeyIdxBiMap(&tmp); +} + +TEST_P(FrozenKeyIdxBiMapConstructorSuite, LongKey) { + const char* first_key = "density"; + std::string long_key(grackle::impl::bimap_detail::KEYLEN_MAX, 'A'); + const char* keys[2] = {first_key, long_key.data()}; + + grackle::impl::FrozenKeyIdxBiMap tmp = + grackle::impl::new_FrozenKeyIdxBiMap(keys, 2, GetParam()); + + ASSERT_TRUE(grackle::impl::FrozenKeyIdxBiMap_is_ok(&tmp)); + grackle::impl::drop_FrozenKeyIdxBiMap(&tmp); +} + +TEST_P(FrozenKeyIdxBiMapConstructorSuite, TooLongKey) { + const char* first_key = "density"; + std::string long_key(grackle::impl::bimap_detail::KEYLEN_MAX + 1, 'A'); + const char* keys[2] = {first_key, long_key.data()}; + + grackle::impl::FrozenKeyIdxBiMap tmp = + grackle::impl::new_FrozenKeyIdxBiMap(keys, 2, GetParam()); + ASSERT_FALSE(grackle::impl::FrozenKeyIdxBiMap_is_ok(&tmp)); +} + +TEST_P(FrozenKeyIdxBiMapConstructorSuite, 0LenKey) { + const char* keys[2] = {"density", ""}; + grackle::impl::FrozenKeyIdxBiMap tmp = + grackle::impl::new_FrozenKeyIdxBiMap(keys, 2, GetParam()); + ASSERT_FALSE(grackle::impl::FrozenKeyIdxBiMap_is_ok(&tmp)); +} + +TEST_P(FrozenKeyIdxBiMapConstructorSuite, NullptrWithPosCount) { + grackle::impl::FrozenKeyIdxBiMap tmp = + grackle::impl::new_FrozenKeyIdxBiMap(nullptr, 1, GetParam()); + ASSERT_FALSE(grackle::impl::FrozenKeyIdxBiMap_is_ok(&tmp)); +} + +TEST_P(FrozenKeyIdxBiMapConstructorSuite, NotNull0KeyCount) { + const char* keys[] = {"denisty", "internal_energy"}; + grackle::impl::FrozenKeyIdxBiMap tmp = + grackle::impl::new_FrozenKeyIdxBiMap(keys, 0, GetParam()); + ASSERT_FALSE(grackle::impl::FrozenKeyIdxBiMap_is_ok(&tmp)); +} + +INSTANTIATE_TEST_SUITE_P( + , // <- leaving Instantiation name empty + FrozenKeyIdxBiMapConstructorSuite, + testing::Values(grackle::impl::BiMapMode::REFS_KEYDATA, + grackle::impl::BiMapMode::COPIES_KEYDATA), + [](const testing::TestParamInfo< + FrozenKeyIdxBiMapConstructorSuite::ParamType>& info) { + if (info.param == grackle::impl::BiMapMode::REFS_KEYDATA) { + return std::string("BIMAP_REFS_KEYDATA"); + } else { + return std::string("BIMAP_COPIES_KEYDATA"); + } + }); + +/// helper function to initialize a map from a vector +grackle::impl::FrozenKeyIdxBiMap new_FrozenKeyIdxBiMap( + const std::vector& vec_, grackle::impl::BiMapMode mode) { + std::size_t key_count = vec_.size(); + + // create a vector of pointers + std::vector key_ptr_l(key_count, nullptr); + for (std::size_t i = 0; i < key_count; i++) { + key_ptr_l[i] = vec_[i].c_str(); + } + + return new_FrozenKeyIdxBiMap(key_ptr_l.data(), key_count, mode); +} + +class FrozenKeyIdxBiMapGeneralSuite + : public testing::TestWithParam { +protected: + std::vector ordered_keys; + grackle::impl::FrozenKeyIdxBiMap* bimap_p = nullptr; + + // we use SetUp/Teardown instead of constructor and destructor so we can + // perform some sanity checks with ASSERTIONs + void SetUp() override { + ordered_keys = + std::vector{"internal_energy", "density", "metal_density"}; + + grackle::impl::BiMapMode mode = GetParam(); + grackle::impl::FrozenKeyIdxBiMap tmp = + new_FrozenKeyIdxBiMap(ordered_keys, mode); + ASSERT_TRUE(grackle::impl::FrozenKeyIdxBiMap_is_ok(&tmp)); + + bimap_p = new grackle::impl::FrozenKeyIdxBiMap; + (*bimap_p) = tmp; + } + + void TearDown() override { + if (bimap_p != nullptr) { + grackle::impl::drop_FrozenKeyIdxBiMap(bimap_p); + delete bimap_p; + } + } + + bool ReusesOriginalKeyPtrs(const grackle::impl::FrozenKeyIdxBiMap* p) const { + for (int i = 0; i < 3; i++) { + const char* orig_key_ptr = ordered_keys[i].c_str(); + if (grackle::impl::FrozenKeyIdxBiMap_inverse_find(p, i) != orig_key_ptr) { + return false; + } + } + return true; + } +}; + +TEST_P(FrozenKeyIdxBiMapGeneralSuite, IdxFromKeyContainedKey) { + EXPECT_THAT(grackle::impl::FrozenKeyIdxBiMap_find(bimap_p, "density"), + AccessRsltHolds(1)); + EXPECT_THAT(grackle::impl::FrozenKeyIdxBiMap_find(bimap_p, "internal_energy"), + AccessRsltHolds(0)); + EXPECT_THAT(grackle::impl::FrozenKeyIdxBiMap_find(bimap_p, "metal_density"), + AccessRsltHolds(2)); +} + +TEST_P(FrozenKeyIdxBiMapGeneralSuite, IdxFromKeyAbsentKey) { + EXPECT_THAT(grackle::impl::FrozenKeyIdxBiMap_find(bimap_p, "notAKey"), + EmptyAccessRslt()); +} + +TEST_P(FrozenKeyIdxBiMapGeneralSuite, IdxFromKeyAbsentIrregularKeys) { + EXPECT_THAT(grackle::impl::FrozenKeyIdxBiMap_find(bimap_p, ""), + EmptyAccessRslt()); + + std::string key(grackle::impl::bimap_detail::KEYLEN_MAX + 1, 'A'); + EXPECT_THAT(grackle::impl::FrozenKeyIdxBiMap_find(bimap_p, key.data()), + EmptyAccessRslt()); +} + +TEST_P(FrozenKeyIdxBiMapGeneralSuite, KeyFromIdxInvalidIdx) { + EXPECT_EQ(grackle::impl::FrozenKeyIdxBiMap_inverse_find(bimap_p, 3), nullptr); +} + +TEST_P(FrozenKeyIdxBiMapGeneralSuite, KeyFromIdxValidIdx) { + EXPECT_EQ( + std::string(grackle::impl::FrozenKeyIdxBiMap_inverse_find(bimap_p, 2)), + std::string("metal_density")); + EXPECT_EQ( + std::string(grackle::impl::FrozenKeyIdxBiMap_inverse_find(bimap_p, 1)), + std::string("density")); + EXPECT_EQ( + std::string(grackle::impl::FrozenKeyIdxBiMap_inverse_find(bimap_p, 0)), + std::string("internal_energy")); + + // check whether the bimap is using pointers to the keys used during init + if (GetParam() == grackle::impl::BiMapMode::REFS_KEYDATA) { + EXPECT_TRUE(ReusesOriginalKeyPtrs(bimap_p)); + } else { + EXPECT_FALSE(ReusesOriginalKeyPtrs(bimap_p)); + } +} + +TEST_P(FrozenKeyIdxBiMapGeneralSuite, Clone) { + grackle::impl::FrozenKeyIdxBiMap clone = + grackle::impl::FrozenKeyIdxBiMap_clone(bimap_p); + ASSERT_TRUE(grackle::impl::FrozenKeyIdxBiMap_is_ok(&clone)); + grackle::impl::FrozenKeyIdxBiMap* clone_p = &clone; + + // for the sake of robustly checking everything, we delete bimap_p + grackle::impl::drop_FrozenKeyIdxBiMap(bimap_p); + bimap_p = nullptr; + + EXPECT_THAT(grackle::impl::FrozenKeyIdxBiMap_find(clone_p, "internal_energy"), + AccessRsltHolds(0)); + EXPECT_THAT(grackle::impl::FrozenKeyIdxBiMap_find(clone_p, "notAKey"), + EmptyAccessRslt()); + + EXPECT_EQ(grackle::impl::FrozenKeyIdxBiMap_inverse_find(clone_p, 3), nullptr); + EXPECT_STREQ(grackle::impl::FrozenKeyIdxBiMap_inverse_find(clone_p, 1), + "density"); + + // check whether the clone is using pointers to the keys used during init + if (GetParam() == grackle::impl::BiMapMode::REFS_KEYDATA) { + EXPECT_TRUE(ReusesOriginalKeyPtrs(clone_p)); + } else { + EXPECT_FALSE(ReusesOriginalKeyPtrs(clone_p)); + } + + // finally, cleanup the clone + grackle::impl::drop_FrozenKeyIdxBiMap(clone_p); +} + +INSTANTIATE_TEST_SUITE_P( + , // <- leaving Instantiation name empty + FrozenKeyIdxBiMapGeneralSuite, + testing::Values(grackle::impl::BiMapMode::REFS_KEYDATA, + grackle::impl::BiMapMode::COPIES_KEYDATA), + [](const testing::TestParamInfo& + info) { + if (info.param == grackle::impl::BiMapMode::REFS_KEYDATA) { + return std::string("BIMAP_REFS_KEYDATA"); + } else { + return std::string("BIMAP_COPIES_KEYDATA"); + } + }); diff --git a/tests/unit/test_unit_hash.cpp b/tests/unit/test_unit_hash.cpp new file mode 100644 index 000000000..101347ad4 --- /dev/null +++ b/tests/unit/test_unit_hash.cpp @@ -0,0 +1,61 @@ +//===----------------------------------------------------------------------===// +// +// See the LICENSE file for license and copyright information +// SPDX-License-Identifier: NCSA AND BSD-3-Clause +// +//===----------------------------------------------------------------------===// +/// +/// @file +/// Check correctness of the hash function +/// +//===----------------------------------------------------------------------===// + +#include +#include "support/fnv1a_hash.hpp" +#include +#include + +namespace grackle::impl { +/// Teach GTest how to print HashRsltPack +/// @note it's important this is in the same namespace as HashRsltPack +void PrintTo(const HashRsltPack& pack, std::ostream* os) { + *os << "{success: " << pack.success << ", keylen: " << pack.keylen + << ", hash: 0x" << std::setfill('0') + << std::setw(8) // u32 has 8 hex digits + << std::hex << pack.hash << "}"; +} + +bool operator==(const HashRsltPack& a, const HashRsltPack& b) { + return a.success == b.success && a.keylen == b.keylen && a.hash == b.hash; +} + +} // namespace grackle::impl + +// the test answers primarily came from Appendix C of +// https://datatracker.ietf.org/doc/html/draft-eastlake-fnv-17 + +TEST(FNV1a, EmptyString) { + grackle::impl::HashRsltPack expected{true, 0, 0x811c9dc5ULL}; + ASSERT_EQ(grackle::impl::fnv1a_hash(""), expected); +} + +TEST(FNV1a, aString) { + grackle::impl::HashRsltPack expected{true, 1, 0xe40c292cULL}; + ASSERT_EQ(grackle::impl::fnv1a_hash("a"), expected); +} + +TEST(FNV1a, foobarString) { + grackle::impl::HashRsltPack expected{true, 6, 0xbf9cf968ULL}; + ASSERT_EQ(grackle::impl::fnv1a_hash("foobar"), expected); +} + +TEST(FNV1a, MaxSizeString) { + constexpr int MaxKeyLen = 6; // <- exactly matches the key's length + grackle::impl::HashRsltPack expected{true, 6, 0xbf9cf968ULL}; + ASSERT_EQ(grackle::impl::fnv1a_hash("foobar"), expected); +} + +TEST(FNV1a, TooLongString) { + constexpr int MaxKeyLen = 5; // <- shorter than the queried key + ASSERT_FALSE(grackle::impl::fnv1a_hash("foobar").success); +}