Skip to content

Commit 427d5b5

Browse files
committed
Add: OpenMP score_diagonally
1 parent b55c696 commit 427d5b5

File tree

1 file changed

+54
-18
lines changed

1 file changed

+54
-18
lines changed

include/stringzilla/similarity.hpp

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
* - `sz::openmp::levenshtein_distances` & `sz::openmp::levenshtein_distances_utf8` for Levenshtein edit-distances.
99
* - `sz::openmp::needleman_wunsch_score` for weighted Needleman-Wunsch global alignment.
1010
*
11-
* Those are mostly providing specialized overloads of the @b `sz::openmp::global_score_kernel` template.
12-
*
11+
* Those are mostly providing specialized overloads of the @b `sz::openmp::score_diagonally` template.
1312
*/
1413
#ifndef STRINGZILLA_SIMILARITY_HPP_
1514
#define STRINGZILLA_SIMILARITY_HPP_
@@ -25,23 +24,26 @@ struct uniform_substitution_cost_t {
2524
};
2625

2726
/**
28-
* @brief Levenshtein edit-distance computation using skewed diagonal order evaluation on CPU.
29-
* Type-agnostic, unlike `sz_levenshtein_distance`, and annotated for OpenMP parallelization.
27+
* @brief Alignment Score and Edit Distance algorithm evaluating the Dynamic Programming matrix
28+
* @b three skewed (reverse) diagonals at a time on a CPU, leveraging OpenMP for parallelization.
29+
* @sa sz_levenshtein_distance, sz_levenshtein_distance_utf8, sz_needleman_wunsch_score
3030
*
3131
* @param[in] first The first string.
3232
* @param[in] first_length The length of the first string.
3333
* @param[in] second The second string.
3434
* @param[in] second_length The length of the second string.
3535
*
36-
* There are smarter algorithms for computing the Levenshtein distance, but this one is extremely flexible.
36+
* There are smarter algorithms for computing the Levenshtein distance, mostly based on bit-level operations.
37+
* Those, however, don't generalize well to arbitrary length inputs or non-uniform substitution costs.
38+
* This algorithm provides a more flexible baseline implementation for future SIMD and GPGPU optimizations.
3739
*/
3840
template < //
3941
typename char_type_ = char, //
4042
typename distance_type_ = sz_size_t, //
4143
typename get_substitution_cost_ = uniform_substitution_cost_t, //
4244
sz_error_cost_t gap_cost_ = 1 //
4345
>
44-
sz_status_t global_score_kernel( //
46+
sz_status_t score_diagonally( //
4547
char_type_ const *shorter, sz_size_t shorter_length, //
4648
char_type_ const *longer, sz_size_t longer_length, //
4749
get_substitution_cost_ &&get_substitution_cost, //
@@ -95,13 +97,16 @@ sz_status_t global_score_kernel( //
9597
for (sz_size_t offset_in_diagonal = 1; offset_in_diagonal + 1 < next_diagonal_length; ++offset_in_diagonal) {
9698
// ? Note that here we are still traversing both buffers in the same order,
9799
// ? because the shorter string has been reversed into `shorter_reversed`.
98-
char_type_ shorter_char = shorter_reversed[shorter_length - next_diagonal_index + offset_in_diagonal - 1];
99-
char_type_ longer_char = longer[offset_in_diagonal];
100+
char_type_ shorter_char = shorter_reversed[shorter_length - next_diagonal_index + offset_within_diagonal];
101+
char_type_ longer_char = longer[offset_in_diagonal - 1];
100102
sz_error_cost_t cost_of_substitution = get_substitution_cost(shorter_char, longer_char);
101103
distance_type_ cost_if_substitution = previous_distances[offset_in_diagonal] + cost_of_substitution;
102-
distance_type_ cost_if_deletion_or_insertion =
103-
gap_cost_ +
104-
sz_min_of_two(current_distances[offset_in_diagonal - 1], current_distances[offset_in_diagonal]);
104+
distance_type_ cost_if_deletion_or_insertion = //
105+
sz_min_of_two( //
106+
current_distances[offset_in_diagonal - 1], //
107+
current_distances[offset_in_diagonal] //
108+
) +
109+
gap_cost_;
105110
next_distances[offset_in_diagonal] = sz_min_of_two(cost_if_deletion_or_insertion, cost_if_substitution);
106111
}
107112
// Don't forget to populate the first row and the first column of the Levenshtein matrix.
@@ -116,19 +121,50 @@ sz_status_t global_score_kernel( //
116121
// Now let's handle the anti-diagonal band of the matrix, between the top and bottom triangles.
117122
for (; next_diagonal_index != longer_dim; ++next_diagonal_index) {
118123
sz_size_t const next_diagonal_length = shorter_dim;
119-
for (sz_size_t offset_in_diagonal = 1; offset_in_diagonal + 1 < next_diagonal_length; ++offset_in_diagonal) {
120-
char_type_ shorter_char = shorter[shorter_length - 1 - i]; // ! Walks in reverse order.
121-
char_type_ longer_char = longer[next_diagonal_index - n + i];
124+
#pragma omp simd
125+
for (sz_size_t offset_in_diagonal = 0; offset_in_diagonal + 1 < next_diagonal_length; ++offset_in_diagonal) {
126+
char_type_ shorter_char = shorter_reversed[shorter_length - shorter_dim + offset_in_diagonal + 1];
127+
char_type_ longer_char = longer[next_diagonal_index - shorter_dim + offset_in_diagonal];
122128
sz_error_cost_t cost_of_substitution = get_substitution_cost(shorter_char, longer_char);
123-
distance_type_ cost_if_substitution = previous_distances[i] + cost_of_substitution;
124-
distance_type_ cost_if_deletion_or_insertion =
125-
sz_min_of_two(current_distances[i], current_distances[i + 1]) + gap_cost_;
129+
distance_type_ cost_if_substitution = previous_distances[offset_in_diagonal] + cost_of_substitution;
130+
distance_type_ cost_if_deletion_or_insertion = //
131+
sz_min_of_two( //
132+
current_distances[offset_in_diagonal], //
133+
current_distances[offset_in_diagonal + 1] //
134+
) +
135+
gap_cost_;
136+
next_distances[i] = sz_min_of_two(cost_if_deletion_or_insertion, cost_if_substitution);
137+
}
138+
next_distances[next_diagonal_length - 1] = next_diagonal_index;
139+
// Perform a circular rotation of those buffers, to reuse the memory, this time, with a shift,
140+
// dropping the first element in the current array.
141+
distance_type_ *temporary = previous_distances;
142+
previous_distances = current_distances + 1; // ! Note how we shift forward here
143+
current_distances = next_distances;
144+
next_distances = temporary;
145+
}
146+
147+
// Now let's handle the bottom-right triangle of the matrix.
148+
for (; next_diagonal_index != longer_dim; ++next_diagonal_index) {
149+
sz_size_t const next_diagonal_length = diagonals_count - next_diagonal_index;
150+
#pragma omp simd
151+
for (sz_size_t offset_in_diagonal = 0; offset_in_diagonal < next_diagonal_length; ++offset_in_diagonal) {
152+
char_type_ shorter_char = shorter_reversed[shorter_length - shorter_dim + offset_in_diagonal + 1];
153+
char_type_ longer_char = longer[next_diagonal_index - shorter_dim + offset_in_diagonal];
154+
sz_error_cost_t cost_of_substitution = get_substitution_cost(shorter_char, longer_char);
155+
distance_type_ cost_if_substitution = previous_distances[offset_in_diagonal] + cost_of_substitution;
156+
distance_type_ cost_if_deletion_or_insertion = //
157+
sz_min_of_two( //
158+
current_distances[offset_in_diagonal], //
159+
current_distances[offset_in_diagonal + 1] //
160+
) +
161+
gap_cost_;
126162
next_distances[i] = sz_min_of_two(cost_if_deletion_or_insertion, cost_if_substitution);
127163
}
128164
// Perform a circular rotation of those buffers, to reuse the memory, this time, with a shift,
129165
// dropping the first element in the current array.
130166
distance_type_ *temporary = previous_distances;
131-
previous_distances = current_distances + 1;
167+
previous_distances = current_distances + 1; // ! Note how we shift forward here
132168
current_distances = next_distances;
133169
next_distances = temporary;
134170
}

0 commit comments

Comments
 (0)