@@ -83,11 +83,15 @@ void rotate_three(value_type_ &a, value_type_ &b, value_type_ &c) noexcept {
8383
8484struct linear_gap_costs_t {
8585 error_cost_t open_or_extend = 1 ;
86+
87+ constexpr sz_size_t magnitude () const noexcept { return std::abs (open_or_extend); }
8688};
8789
8890struct affine_gap_costs_t {
8991 error_cost_t open = 1 ;
9092 error_cost_t extend = 1 ;
93+
94+ constexpr sz_size_t magnitude () const noexcept { return std::max (std::abs (open), std::abs (extend)); }
9195};
9296
9397template <typename gap_costs_type_>
@@ -109,7 +113,7 @@ struct uniform_substitution_costs_t {
109113
110114 constexpr error_cost_t operator ()(char a, char b) const noexcept { return a == b ? match : mismatch; }
111115 constexpr error_cost_t operator ()(sz_rune_t a, sz_rune_t b) const noexcept { return a == b ? match : mismatch; }
112- constexpr sz_size_t max_magnitude_change () const noexcept { return std::max (std::abs (match), std::abs (mismatch)); }
116+ constexpr sz_size_t magnitude () const noexcept { return std::max (std::abs (match), std::abs (mismatch)); }
113117};
114118
115119#pragma region - Algorithm Building Blocks
@@ -133,23 +137,22 @@ struct similarity_memory_requirements {
133137 size_t total = 0 ;
134138
135139 /* *
136- * @param[in] first_length The length of the first string in characters/codepoints.
137- * @param[in] second_length The length of the second string in characters/codepoints.
138- * @param[in] max_magnitude_change The absolute value of the maximum change in nearby cells.
140+ * @param[in] first_length,second_length The lengths of strings in characters/codepoints/runes.
141+ * @param[in] substitute_magnitude,gap_magnitude The absolute value of the maximum change in nearby cells.
139142 * @param[in] bytes_per_char The number of bytes per character, 4 for UTF-32, 1 for ASCII.
140143 * @param[in] register_width The alignment of the data in bytes, 4 for CUDA, 64 for AVX-512.
141144 *
142- * To understand the @p max_magnitude_change parameter, consider the following example:
145+ * To understand the @p magnitude() parameter, consider the following example:
143146 * - substitution costs ranging from -16 to +15
144147 * - gap costs equal to -10
145148 * In that case, the biggest change will be `abs(-16) = 16`, so the passed argument should be 16.
146149 * In case of default Levenshtein distance, the maximum change is 1, so the passed argument should be 1.
147150 */
148- constexpr similarity_memory_requirements ( //
149- size_t first_length, size_t second_length, //
150- sz_similarity_gaps_t gap_type, //
151- size_t max_magnitude_change, //
152- size_t bytes_per_char, //
151+ constexpr similarity_memory_requirements ( //
152+ size_t first_length, size_t second_length, //
153+ sz_similarity_gaps_t gap_type, //
154+ size_t substitute_magnitude, size_t gap_magnitude, //
155+ size_t bytes_per_char, //
153156 size_t register_width) noexcept {
154157
155158 // Each diagonal in the DP matrix is only by 1 longer than the shorter string.
@@ -159,7 +162,8 @@ struct similarity_memory_requirements {
159162
160163 // The amount of memory we need per diagonal, depends on the maximum number of the differences
161164 // between 2 strings and the maximum cost of each change.
162- size_t max_cell_value = (longer_length + 1 ) * max_magnitude_change;
165+ size_t magnitude = sz_max_of_two (substitute_magnitude, gap_magnitude);
166+ size_t max_cell_value = (longer_length + 1 ) * magnitude;
163167 if constexpr (!is_signed_k)
164168 this ->bytes_per_cell = //
165169 max_cell_value < 256 ? 1
@@ -1401,9 +1405,9 @@ struct levenshtein_distance {
14011405
14021406 // Estimate the maximum dimension of the DP matrix and choose the best type for it.
14031407 using similarity_memory_requirements_t = similarity_memory_requirements<sz_size_t , false >;
1404- similarity_memory_requirements_t requirements ( //
1405- first.size (), second.size (), //
1406- gap_type<gap_costs_t >(), substituter_.max_magnitude_change (), //
1408+ similarity_memory_requirements_t requirements ( //
1409+ first.size (), second.size (), //
1410+ gap_type<gap_costs_t >(), substituter_.magnitude (), gap_costs_. magnitude (), //
14071411 sizeof (char_t ), SZ_MAX_REGISTER_WIDTH);
14081412
14091413 // When dealing with very small inputs, we may want to use a simpler Wagner-Fischer algorithm.
@@ -1531,9 +1535,9 @@ struct levenshtein_distance_utf8 {
15311535
15321536 // Estimate the maximum dimension of the DP matrix and choose the best type for it.
15331537 using similarity_memory_requirements_t = similarity_memory_requirements<sz_size_t , false >;
1534- similarity_memory_requirements_t requirements ( //
1535- first.size (), second.size (), //
1536- gap_type<gap_costs_t >(), substituter_.max_magnitude_change (), //
1538+ similarity_memory_requirements_t requirements ( //
1539+ first.size (), second.size (), //
1540+ gap_type<gap_costs_t >(), substituter_.magnitude (), gap_costs_. magnitude (), //
15371541 sizeof (sz_rune_t ), SZ_MAX_REGISTER_WIDTH);
15381542
15391543 span<sz_rune_t const > const first_utf32 {first_data_utf32, first_length_utf32};
@@ -1630,9 +1634,9 @@ struct needleman_wunsch_score {
16301634
16311635 // Estimate the maximum dimension of the DP matrix and choose the best type for it.
16321636 using similarity_memory_requirements_t = similarity_memory_requirements<sz_size_t , true >;
1633- similarity_memory_requirements_t requirements ( //
1634- first.size (), second.size (), //
1635- gap_type<gap_costs_t >(), substituter_.max_magnitude_change (), //
1637+ similarity_memory_requirements_t requirements ( //
1638+ first.size (), second.size (), //
1639+ gap_type<gap_costs_t >(), substituter_.magnitude (), gap_costs_. magnitude (), //
16361640 sizeof (char_t ), SZ_MAX_REGISTER_WIDTH);
16371641
16381642 // When dealing with very small inputs, we may want to use a simpler Wagner-Fischer algorithm.
@@ -1724,9 +1728,9 @@ struct smith_waterman_score {
17241728
17251729 // Estimate the maximum dimension of the DP matrix and choose the best type for it.
17261730 using similarity_memory_requirements_t = similarity_memory_requirements<sz_size_t , true >;
1727- similarity_memory_requirements_t requirements ( //
1728- first.size (), second.size (), //
1729- gap_type<gap_costs_t >(), substituter_.max_magnitude_change (), //
1731+ similarity_memory_requirements_t requirements ( //
1732+ first.size (), second.size (), //
1733+ gap_type<gap_costs_t >(), substituter_.magnitude (), gap_costs_. magnitude (), //
17301734 sizeof (char_t ), SZ_MAX_REGISTER_WIDTH);
17311735
17321736 // When dealing with very small inputs, we may want to use a simpler Wagner-Fischer algorithm.
@@ -1783,7 +1787,7 @@ status_t _score_in_parallel( //
17831787 core_per_input_type_ &&core_per_input, //
17841788 all_cores_per_input_type_ &&all_cores_per_input, //
17851789 first_strings_type_ const &first_strings, second_strings_type_ const &second_strings, results_type_ &&results,
1786- sz_size_t max_magnitude_change , cpu_specs_t specs = {}) noexcept {
1790+ size_t substitute_magnitude, size_t gap_magnitude , cpu_specs_t specs = {}) noexcept {
17871791
17881792 using score_t = score_type_;
17891793 constexpr bool score_is_signed_k = std::is_signed_v<score_t >;
@@ -1808,9 +1812,9 @@ status_t _score_in_parallel( //
18081812 auto const &second = second_strings[i];
18091813
18101814 // ! Longer strings will be handled separately
1811- similarity_memory_requirements_t requirements ( //
1812- first.size (), second.size (), //
1813- gap_type<gap_costs_t >(), max_magnitude_change , //
1815+ similarity_memory_requirements_t requirements ( //
1816+ first.size (), second.size (), //
1817+ gap_type<gap_costs_t >(), substitute_magnitude, gap_magnitude , //
18141818 sizeof (char_t ), SZ_MAX_REGISTER_WIDTH);
18151819
18161820 if (requirements.total >= specs.l1_bytes ) continue ;
@@ -1824,9 +1828,9 @@ status_t _score_in_parallel( //
18241828 score_t result = 0 ;
18251829 auto const &first = first_strings[i];
18261830 auto const &second = second_strings[i];
1827- similarity_memory_requirements_t requirements ( //
1828- first.size (), second.size (), //
1829- gap_type<gap_costs_t >(), max_magnitude_change , //
1831+ similarity_memory_requirements_t requirements ( //
1832+ first.size (), second.size (), //
1833+ gap_type<gap_costs_t >(), substitute_magnitude, gap_magnitude , //
18301834 sizeof (char_t ), SZ_MAX_REGISTER_WIDTH);
18311835
18321836 if (requirements.total < specs.l1_bytes ) continue ;
@@ -1901,10 +1905,11 @@ struct levenshtein_distances {
19011905 results_type_ &&results, cpu_specs_t const &specs = {}) const noexcept {
19021906
19031907 if constexpr (capability_k & sz_cap_parallel_k)
1904- return _score_in_parallel<sz_size_t >( //
1905- core_per_input_t {substituter_, gap_costs_, alloc_}, //
1906- all_cores_per_input_t {substituter_, gap_costs_, alloc_}, //
1907- first_strings, second_strings, std::forward<results_type_>(results), 1 , specs);
1908+ return _score_in_parallel<sz_size_t >( //
1909+ core_per_input_t {substituter_, gap_costs_, alloc_}, //
1910+ all_cores_per_input_t {substituter_, gap_costs_, alloc_}, //
1911+ first_strings, second_strings, std::forward<results_type_>(results), //
1912+ substituter_.magnitude (), gap_costs_.magnitude (), specs);
19081913 else
19091914 return _score_sequentially<sz_size_t >( //
19101915 all_cores_per_input_t {substituter_, gap_costs_, alloc_}, //
@@ -1945,10 +1950,11 @@ struct levenshtein_distances_utf8 {
19451950 results_type_ &&results, cpu_specs_t const &specs = {}) const noexcept {
19461951
19471952 if constexpr (capability_k & sz_cap_parallel_k)
1948- return _score_in_parallel<sz_size_t >( //
1949- core_per_input_t {substituter_, gap_costs_, alloc_}, //
1950- all_cores_per_input_t {substituter_, gap_costs_, alloc_}, //
1951- first_strings, second_strings, std::forward<results_type_>(results), 1 , specs);
1953+ return _score_in_parallel<sz_size_t >( //
1954+ core_per_input_t {substituter_, gap_costs_, alloc_}, //
1955+ all_cores_per_input_t {substituter_, gap_costs_, alloc_}, //
1956+ first_strings, second_strings, std::forward<results_type_>(results), //
1957+ substituter_.magnitude (), gap_costs_.magnitude (), specs);
19521958 else
19531959 return _score_sequentially<sz_size_t >( //
19541960 all_cores_per_input_t {substituter_, gap_costs_, alloc_}, //
@@ -1991,11 +1997,11 @@ struct needleman_wunsch_scores {
19911997 results_type_ &&results, cpu_specs_t const &specs = {}) const noexcept {
19921998
19931999 if constexpr (capability_k & sz_cap_parallel_k)
1994- return _score_in_parallel<sz_ssize_t >( //
1995- core_per_input_t {substituter_, gap_costs_, alloc_}, //
1996- all_cores_per_input_t {substituter_, gap_costs_, alloc_}, //
1997- first_strings, second_strings, std::forward<results_type_>(results),
1998- substituter_.max_magnitude_change (), specs);
2000+ return _score_in_parallel<sz_ssize_t >( //
2001+ core_per_input_t {substituter_, gap_costs_, alloc_}, //
2002+ all_cores_per_input_t {substituter_, gap_costs_, alloc_}, //
2003+ first_strings, second_strings, std::forward<results_type_>(results), //
2004+ substituter_.magnitude (), gap_costs_. magnitude (), specs);
19992005 else
20002006 return _score_sequentially<sz_ssize_t >( //
20012007 all_cores_per_input_t {substituter_, gap_costs_, alloc_}, //
@@ -2038,11 +2044,11 @@ struct smith_waterman_scores {
20382044 results_type_ &&results, cpu_specs_t const &specs = {}) const noexcept {
20392045
20402046 if constexpr (capability_k & sz_cap_parallel_k)
2041- return _score_in_parallel<sz_ssize_t >( //
2042- core_per_input_t {substituter_, gap_costs_, alloc_}, //
2043- all_cores_per_input_t {substituter_, gap_costs_, alloc_}, //
2044- first_strings, second_strings, std::forward<results_type_>(results),
2045- substituter_.max_magnitude_change (), specs);
2047+ return _score_in_parallel<sz_ssize_t >( //
2048+ core_per_input_t {substituter_, gap_costs_, alloc_}, //
2049+ all_cores_per_input_t {substituter_, gap_costs_, alloc_}, //
2050+ first_strings, second_strings, std::forward<results_type_>(results), //
2051+ substituter_.magnitude (), gap_costs_. magnitude (), specs);
20462052 else
20472053 return _score_sequentially<sz_ssize_t >( //
20482054 all_cores_per_input_t {substituter_, gap_costs_, alloc_}, //
@@ -2081,7 +2087,7 @@ struct error_costs_256x256_t {
20812087 return result;
20822088 }
20832089
2084- constexpr sz_size_t max_magnitude_change () const noexcept {
2090+ constexpr sz_size_t magnitude () const noexcept {
20852091 sz_size_t max_magnitude = 0 ;
20862092 for (int i = 0 ; i != 256 ; ++i)
20872093 for (int j = 0 ; j != 256 ; ++j) //
@@ -2170,7 +2176,7 @@ struct error_costs_26x26ascii_t {
21702176 return result;
21712177 }
21722178
2173- constexpr sz_size_t max_magnitude_change () const noexcept {
2179+ constexpr sz_size_t magnitude () const noexcept {
21742180 sz_size_t max_magnitude = 0 ;
21752181 for (int i = 0 ; i != 26 ; ++i)
21762182 for (int j = 0 ; j != 26 ; ++j) //
@@ -2728,9 +2734,9 @@ struct levenshtein_distance<char, linear_gap_costs_t, allocator_type_, capabilit
27282734
27292735 // Estimate the maximum dimension of the DP matrix and choose the best type for it.
27302736 using similarity_memory_requirements_t = similarity_memory_requirements<sz_size_t , false >;
2731- similarity_memory_requirements_t requirements ( //
2732- first.size (), second.size (), //
2733- gap_type<gap_costs_t >(), substituter_.max_magnitude_change (), //
2737+ similarity_memory_requirements_t requirements ( //
2738+ first.size (), second.size (), //
2739+ gap_type<gap_costs_t >(), substituter_.magnitude (), gap_costs_. magnitude (), //
27342740 sizeof (char_t ), SZ_MAX_REGISTER_WIDTH);
27352741
27362742 // When dealing with larger arrays, we need to differentiate kernel with different cost aggregation types.
@@ -2833,9 +2839,9 @@ struct levenshtein_distance_utf8<char, linear_gap_costs_t, allocator_type_, capa
28332839
28342840 // Estimate the maximum dimension of the DP matrix and choose the best type for it.
28352841 using similarity_memory_requirements_t = similarity_memory_requirements<sz_size_t , false >;
2836- similarity_memory_requirements_t requirements ( //
2837- first.size (), second.size (), //
2838- gap_type<gap_costs_t >(), substituter_.max_magnitude_change (), //
2842+ similarity_memory_requirements_t requirements ( //
2843+ first.size (), second.size (), //
2844+ gap_type<gap_costs_t >(), substituter_.magnitude (), gap_costs_. magnitude (), //
28392845 sizeof (sz_rune_t ), SZ_MAX_REGISTER_WIDTH);
28402846
28412847 // When dealing with larger arrays, we need to differentiate kernel with different cost aggregation types.
@@ -3121,9 +3127,9 @@ struct needleman_wunsch_score<char, error_costs_256x256_t, linear_gap_costs_t, a
31213127
31223128 // Estimate the maximum dimension of the DP matrix and choose the best type for it.
31233129 using similarity_memory_requirements_t = similarity_memory_requirements<sz_size_t , true >;
3124- similarity_memory_requirements_t requirements ( //
3125- first.size (), second.size (), //
3126- gap_type<gap_costs_t >(), substituter_.max_magnitude_change (), //
3130+ similarity_memory_requirements_t requirements ( //
3131+ first.size (), second.size (), //
3132+ gap_type<gap_costs_t >(), substituter_.magnitude (), gap_costs_. magnitude (), //
31273133 sizeof (char_t ), SZ_MAX_REGISTER_WIDTH);
31283134
31293135 // When dealing with larger arrays, we need to differentiate kernel with different cost aggregation types.
@@ -3195,9 +3201,9 @@ struct smith_waterman_score<char, error_costs_256x256_t, linear_gap_costs_t, all
31953201
31963202 // Estimate the maximum dimension of the DP matrix and choose the best type for it.
31973203 using similarity_memory_requirements_t = similarity_memory_requirements<sz_size_t , true >;
3198- similarity_memory_requirements_t requirements ( //
3199- first.size (), second.size (), //
3200- gap_type<gap_costs_t >(), substituter_.max_magnitude_change (), //
3204+ similarity_memory_requirements_t requirements ( //
3205+ first.size (), second.size (), //
3206+ gap_type<gap_costs_t >(), substituter_.magnitude (), gap_costs_. magnitude (), //
32013207 sizeof (char_t ), SZ_MAX_REGISTER_WIDTH);
32023208
32033209 // When dealing with larger arrays, we need to differentiate kernel with different cost aggregation types.
0 commit comments