Skip to content

Commit 6f8cdb9

Browse files
committed
Add: OpenMP C++ draft
1 parent dd57536 commit 6f8cdb9

File tree

1 file changed

+147
-0
lines changed

1 file changed

+147
-0
lines changed

include/stringzilla/similarity.hpp

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
/**
2+
* @brief OpenMP-accelerated string similarity utilities.
3+
* @file similarity.hpp
4+
* @author Ash Vardanian
5+
*
6+
* Includes core APIs:
7+
*
8+
* - `sz::openmp::levenshtein_distances` & `sz::openmp::levenshtein_distances_utf8` for Levenshtein edit-distances.
9+
* - `sz::openmp::needleman_wunsch_score` for weighted Needleman-Wunsch global alignment.
10+
*
11+
* Those are mostly providing specialized overloads of the @b `sz::openmp::global_score_kernel` template.
12+
*
13+
*/
14+
#ifndef STRINGZILLA_SIMILARITY_HPP_
15+
#define STRINGZILLA_SIMILARITY_HPP_
16+
17+
#include "types.h"
18+
19+
namespace ashvardanian {
20+
namespace stringzilla {
21+
namespace openmp {
22+
23+
struct uniform_substitution_cost_t {
24+
sz_error_cost_t operator()(char a, char b) const { return a == b ? 0 : 1; }
25+
};
26+
27+
/**
28+
* @brief Levenshtein edit-distance computation using skewed diagonal order evaluation on CPU.
29+
* Type-agnostic, unlike `sz_levenshtein_distance`, and annotated for OpenMP parallelization.
30+
*
31+
* @param[in] first The first string.
32+
* @param[in] first_length The length of the first string.
33+
* @param[in] second The second string.
34+
* @param[in] second_length The length of the second string.
35+
*
36+
* There are smarter algorithms for computing the Levenshtein distance, but this one is extremely flexible.
37+
*/
38+
template < //
39+
typename char_type_ = char, //
40+
typename distance_type_ = sz_size_t, //
41+
typename get_substitution_cost_ = uniform_substitution_cost_t, //
42+
sz_error_cost_t gap_cost_ = 1 //
43+
>
44+
sz_status_t global_score_kernel( //
45+
char_type_ const *shorter, sz_size_t shorter_length, //
46+
char_type_ const *longer, sz_size_t longer_length, //
47+
get_substitution_cost_ &&get_substitution_cost, //
48+
sz_memory_allocator_t *alloc, //
49+
distance_type_ *result_ptr) {
50+
51+
// We are going to store 3 diagonals of the matrix, assuming each would fit into a single ZMM register.
52+
// The length of the longest (main) diagonal would be `shorter_dim = (shorter_length + 1)`.
53+
sz_size_t const shorter_dim = shorter_length + 1;
54+
sz_size_t const longer_dim = longer_length + 1;
55+
56+
// Let's say we are dealing with 3 and 5 letter words.
57+
// The matrix will have size 4 x 6, parameterized as (shorter_dim x longer_dim).
58+
// It will have:
59+
// - 4 diagonals of increasing length, at positions: 0, 1, 2, 3.
60+
// - 2 diagonals of fixed length, at positions: 4, 5.
61+
// - 3 diagonals of decreasing length, at positions: 6, 7, 8.
62+
sz_size_t const diagonals_count = shorter_dim + longer_dim - 1;
63+
sz_size_t const max_diagonal_length = shorter_length + 1;
64+
65+
// We want to avoid reverse-order iteration over the shorter string.
66+
// Let's allocate a bit more memory and reverse-export our shorter string into that buffer.
67+
sz_size_t const buffer_length =
68+
sizeof(distance_type_) * max_diagonal_length * 3 + shorter_length * sizeof(char_type_);
69+
distance_type_ *const distances = (distance_type_ *)alloc->allocate(buffer_length, alloc->handle);
70+
if (!distances) return sz_bad_alloc_k;
71+
72+
// The next few pointers will be swapped around.
73+
distance_type_ *previous_distances = distances;
74+
distance_type_ *current_distances = previous_distances + longer_dim;
75+
distance_type_ *next_distances = current_distances + longer_dim;
76+
char_type_ const *const shorter_reversed = (char_type_ const *)(next_distances + longer_dim);
77+
78+
// Export the reversed string into the buffer.
79+
for (sz_size_t i = 0; i != shorter_length; ++i) shorter_reversed[i] = shorter[shorter_length - 1 - i];
80+
81+
// Initialize the first two diagonals:
82+
previous_distances[0] = 0;
83+
current_distances[0] = current_distances[1] = 1;
84+
85+
// We skip diagonals 0 and 1, as they are trivial.
86+
// We will start with diagonal 2, which has length 3, with the first and last elements being preset,
87+
// so we are effectively computing just one value, as will be marked by a single set bit in
88+
// the `next_diagonal_mask` on the very first iteration.
89+
sz_size_t next_diagonal_index = 2;
90+
91+
// Progress through the upper triangle of the Levenshtein matrix.
92+
for (; next_diagonal_index != shorter_dim; ++next_diagonal_index) {
93+
sz_size_t const next_diagonal_length = next_diagonal_index + 1;
94+
#pragma omp simd
95+
for (sz_size_t offset_in_diagonal = 1; offset_in_diagonal + 1 < next_diagonal_length; ++offset_in_diagonal) {
96+
// ? Note that here we are still traversing both buffers in the same order,
97+
// ? because the shorter string has been reversed into `shorter_reversed`.
98+
char_type_ shorter_char = shorter_reversed[shorter_length - next_diagonal_index + offset_in_diagonal - 1];
99+
char_type_ longer_char = longer[offset_in_diagonal];
100+
sz_error_cost_t cost_of_substitution = get_substitution_cost(shorter_char, longer_char);
101+
distance_type_ cost_if_substitution = previous_distances[offset_in_diagonal] + cost_of_substitution;
102+
distance_type_ cost_if_deletion_or_insertion =
103+
gap_cost_ +
104+
sz_min_of_two(current_distances[offset_in_diagonal - 1], current_distances[offset_in_diagonal]);
105+
next_distances[offset_in_diagonal] = sz_min_of_two(cost_if_deletion_or_insertion, cost_if_substitution);
106+
}
107+
// Don't forget to populate the first row and the first column of the Levenshtein matrix.
108+
next_distances[0] = next_distances[next_diagonal_length - 1] = next_diagonal_index;
109+
// Perform a circular rotation of those buffers, to reuse the memory.
110+
distance_type_ *temporary = previous_distances;
111+
previous_distances = current_distances;
112+
current_distances = next_distances;
113+
next_distances = temporary;
114+
}
115+
116+
// Now let's handle the anti-diagonal band of the matrix, between the top and bottom triangles.
117+
for (; next_diagonal_index != longer_dim; ++next_diagonal_index) {
118+
sz_size_t const next_diagonal_length = shorter_dim;
119+
for (sz_size_t offset_in_diagonal = 1; offset_in_diagonal + 1 < next_diagonal_length; ++offset_in_diagonal) {
120+
char_type_ shorter_char = shorter[shorter_length - 1 - i]; // ! Walks in reverse order.
121+
char_type_ longer_char = longer[next_diagonal_index - n + i];
122+
sz_error_cost_t cost_of_substitution = get_substitution_cost(shorter_char, longer_char);
123+
distance_type_ cost_if_substitution = previous_distances[i] + cost_of_substitution;
124+
distance_type_ cost_if_deletion_or_insertion =
125+
sz_min_of_two(current_distances[i], current_distances[i + 1]) + gap_cost_;
126+
next_distances[i] = sz_min_of_two(cost_if_deletion_or_insertion, cost_if_substitution);
127+
}
128+
// Perform a circular rotation of those buffers, to reuse the memory, this time, with a shift,
129+
// dropping the first element in the current array.
130+
distance_type_ *temporary = previous_distances;
131+
previous_distances = current_distances + 1;
132+
current_distances = next_distances;
133+
next_distances = temporary;
134+
}
135+
136+
// Cache scalar before `free` call.
137+
distance_type_ result = current_distances[0];
138+
alloc->free(distances, buffer_length, alloc->handle);
139+
*result_ptr = result;
140+
return sz_success_k;
141+
}
142+
143+
} // namespace openmp
144+
} // namespace stringzilla
145+
} // namespace ashvardanian
146+
147+
#endif // STRINGZILLA_SIMILARITY_HPP_

0 commit comments

Comments
 (0)