@@ -50,6 +50,13 @@ sz_status_t score_diagonally( //
5050 sz_memory_allocator_t *alloc, //
5151 distance_type_ *result_ptr) {
5252
53+ // Simplify usage in higher-level libraries, where wrapping custom allocators may be troublesome.
54+ sz_memory_allocator_t global_alloc;
55+ if (!alloc) {
56+ sz_memory_allocator_init_default (&global_alloc);
57+ alloc = &global_alloc;
58+ }
59+
5360 // We are going to store 3 diagonals of the matrix, assuming each would fit into a single ZMM register.
5461 // The length of the longest (main) diagonal would be `shorter_dim = (shorter_length + 1)`.
5562 sz_size_t const shorter_dim = shorter_length + 1 ;
@@ -75,7 +82,7 @@ sz_status_t score_diagonally( //
7582 distance_type_ *previous_distances = distances;
7683 distance_type_ *current_distances = previous_distances + longer_dim;
7784 distance_type_ *next_distances = current_distances + longer_dim;
78- char_type_ const *const shorter_reversed = (char_type_ const *)(next_distances + longer_dim);
85+ char_type_ *const shorter_reversed = (char_type_ *)(next_distances + longer_dim);
7986
8087 // Export the reversed string into the buffer.
8188 for (sz_size_t i = 0 ; i != shorter_length; ++i) shorter_reversed[i] = shorter[shorter_length - 1 - i];
@@ -91,13 +98,13 @@ sz_status_t score_diagonally( //
9198 sz_size_t next_diagonal_index = 2 ;
9299
93100 // Progress through the upper triangle of the Levenshtein matrix.
94- for (; next_diagonal_index != shorter_dim; ++next_diagonal_index) {
101+ for (; next_diagonal_index < shorter_dim; ++next_diagonal_index) {
95102 sz_size_t const next_diagonal_length = next_diagonal_index + 1 ;
96103#pragma omp simd
97104 for (sz_size_t offset_in_diagonal = 1 ; offset_in_diagonal + 1 < next_diagonal_length; ++offset_in_diagonal) {
98105 // ? Note that here we are still traversing both buffers in the same order,
99106 // ? because the shorter string has been reversed into `shorter_reversed`.
100- char_type_ shorter_char = shorter_reversed[shorter_length - next_diagonal_index + offset_within_diagonal ];
107+ char_type_ shorter_char = shorter_reversed[shorter_length - next_diagonal_index + offset_in_diagonal ];
101108 char_type_ longer_char = longer[offset_in_diagonal - 1 ];
102109 sz_error_cost_t cost_of_substitution = get_substitution_cost (shorter_char, longer_char);
103110 distance_type_ cost_if_substitution = previous_distances[offset_in_diagonal] + cost_of_substitution;
@@ -119,7 +126,7 @@ sz_status_t score_diagonally( //
119126 }
120127
121128 // Now let's handle the anti-diagonal band of the matrix, between the top and bottom triangles.
122- for (; next_diagonal_index != longer_dim; ++next_diagonal_index) {
129+ for (; next_diagonal_index < longer_dim; ++next_diagonal_index) {
123130 sz_size_t const next_diagonal_length = shorter_dim;
124131#pragma omp simd
125132 for (sz_size_t offset_in_diagonal = 0 ; offset_in_diagonal + 1 < next_diagonal_length; ++offset_in_diagonal) {
@@ -133,7 +140,7 @@ sz_status_t score_diagonally( //
133140 current_distances[offset_in_diagonal + 1 ] //
134141 ) +
135142 gap_cost_;
136- next_distances[i ] = sz_min_of_two (cost_if_deletion_or_insertion, cost_if_substitution);
143+ next_distances[offset_in_diagonal ] = sz_min_of_two (cost_if_deletion_or_insertion, cost_if_substitution);
137144 }
138145 next_distances[next_diagonal_length - 1 ] = next_diagonal_index;
139146 // Perform a circular rotation of those buffers, to reuse the memory, this time, with a shift,
@@ -145,7 +152,7 @@ sz_status_t score_diagonally( //
145152 }
146153
147154 // Now let's handle the bottom-right triangle of the matrix.
148- for (; next_diagonal_index != longer_dim ; ++next_diagonal_index) {
155+ for (; next_diagonal_index < diagonals_count ; ++next_diagonal_index) {
149156 sz_size_t const next_diagonal_length = diagonals_count - next_diagonal_index;
150157#pragma omp simd
151158 for (sz_size_t offset_in_diagonal = 0 ; offset_in_diagonal < next_diagonal_length; ++offset_in_diagonal) {
@@ -159,7 +166,7 @@ sz_status_t score_diagonally( //
159166 current_distances[offset_in_diagonal + 1 ] //
160167 ) +
161168 gap_cost_;
162- next_distances[i ] = sz_min_of_two (cost_if_deletion_or_insertion, cost_if_substitution);
169+ next_distances[offset_in_diagonal ] = sz_min_of_two (cost_if_deletion_or_insertion, cost_if_substitution);
163170 }
164171 // Perform a circular rotation of those buffers, to reuse the memory, this time, with a shift,
165172 // dropping the first element in the current array.
@@ -176,6 +183,57 @@ sz_status_t score_diagonally( //
176183 return sz_success_k;
177184}
178185
186+ template <typename first_string_type_, typename second_string_type_>
187+ inline std::size_t levenshtein_distance ( //
188+ first_string_type_ const &first, second_string_type_ const &second) {
189+
190+ std::size_t const first_length = first.length ();
191+ std::size_t const second_length = second.length ();
192+ if (first_length == 0 ) return second_length;
193+ if (second_length == 0 ) return first_length;
194+
195+ sz_size_t max_length = sz_max_of_two (first_length, second_length);
196+ if (max_length < 256u ) {
197+
198+ sz_u8_t result_u8;
199+ sz_status_t status = score_diagonally<char , sz_u8_t , uniform_substitution_cost_t , 1 >(
200+ first.data (), first_length, second.data (), second_length, {}, NULL , &result_u8);
201+ sz_unused (status);
202+ return result_u8;
203+ }
204+ else if (max_length < 65536u ) {
205+ sz_u16_t result_u16;
206+ sz_status_t status = score_diagonally<char , sz_u16_t , uniform_substitution_cost_t , 1 >(
207+ first.data (), first_length, second.data (), second_length, {}, NULL , &result_u16);
208+ sz_unused (status);
209+ return result_u16;
210+ }
211+ else {
212+ sz_size_t result_size_t ;
213+ sz_status_t status = score_diagonally<char , sz_size_t , uniform_substitution_cost_t , 1 >(
214+ first.data (), first_length, second.data (), second_length, {}, NULL , &result_size_t );
215+ sz_unused (status);
216+ return result_size_t ;
217+ }
218+ }
219+
220+ inline void levenshtein_distance_utf8 ( //
221+ sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length, //
222+ sz_size_t bound, sz_memory_allocator_t *alloc, sz_size_t *result) {
223+ sz_unused (a && b && a_length && b_length && alloc && result && bound);
224+ }
225+
226+ inline void needleman_wunsch_score ( //
227+ sz_cptr_t a, sz_size_t a_length, sz_cptr_t b, sz_size_t b_length, //
228+ sz_error_cost_t const *subs, sz_error_cost_t gap, //
229+ sz_memory_allocator_t *alloc, sz_ssize_t *result) {
230+ sz_unused (a && b && a_length && b_length && subs && alloc && result && gap);
231+ }
232+
233+ inline void levenshtein_distances () {}
234+ inline void levenshtein_distances_utf8 () {}
235+ inline void needleman_wunsch_scores () {}
236+
179237} // namespace openmp
180238} // namespace stringzilla
181239} // namespace ashvardanian
0 commit comments