Skip to content

Commit 1879aeb

Browse files
committed
Add: NW benchmarks on GPU
1 parent 9d86d4c commit 1879aeb

File tree

3 files changed

+53
-4
lines changed

3 files changed

+53
-4
lines changed

scripts/bench_similarity.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ int main(int argc, char const **argv) {
6464

6565
std::printf("Starting string similarity benchmarks...\n");
6666
bench_levenshtein(env);
67+
bench_needleman_wunsch(env);
6768
}
6869
catch (std::exception const &e) {
6970
std::fprintf(stderr, "Failed with: %s\n", e.what());

scripts/bench_similarity.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ int main(int argc, char const **argv) {
6464

6565
std::printf("Starting string similarity benchmarks...\n");
6666
bench_levenshtein(env);
67+
bench_needleman_wunsch(env);
6768
}
6869
catch (std::exception const &e) {
6970
std::fprintf(stderr, "Failed with: %s\n", e.what());

scripts/bench_similarity.cuh

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ struct similarities_callable {
4040
if (env.tokens.size() <= batch_size) throw std::runtime_error("Batch size is too large.");
4141
}
4242

43-
inline call_result_t operator()(std::size_t batch_index) noexcept(false) {
43+
call_result_t operator()(std::size_t batch_index) noexcept(false) {
4444
std::size_t const batch_size = results.size();
4545
std::size_t const forward_token_index = (batch_index * batch_size) % (env.tokens.size() - batch_size);
4646
std::size_t const backward_token_index = env.tokens.size() - forward_token_index - batch_size;
@@ -49,10 +49,11 @@ struct similarities_callable {
4949
{env.tokens.data() + backward_token_index, batch_size});
5050
}
5151

52-
inline call_result_t operator()(std::span<token_view_t const> a, std::span<token_view_t const> b) noexcept(false) {
52+
call_result_t operator()(std::span<token_view_t const> a, std::span<token_view_t const> b) noexcept(false) {
5353
// Unpack the extra arguments from `std::tuple` into the engine call using `std::apply`
5454
sz::status_t status =
5555
std::apply([&](auto &&...rest) { return engine(a, b, results.data(), rest...); }, extra_args);
56+
do_not_optimize(status);
5657

5758
if (status != sz::status_t::success_k) throw std::runtime_error("Failed to compute Levenshtein distance.");
5859
do_not_optimize(results);
@@ -71,7 +72,7 @@ struct similarities_callable {
7172
};
7273

7374
struct similarities_equality_t {
74-
bool operator()(check_value_t const &a, check_value_t const &b) const {
75+
bool operator()(check_value_t const &a, check_value_t const &b) const noexcept {
7576
similarities_t const &a_ = *reinterpret_cast<similarities_t const *>(a);
7677
similarities_t const &b_ = *reinterpret_cast<similarities_t const *>(b);
7778
if (a_.size() != b_.size()) return false;
@@ -133,10 +134,56 @@ void bench_levenshtein(environment_t const &env) {
133134
bench_result_t utf8_baseline = bench_unary(env, name_utf8_baseline, call_utf8_baseline).log();
134135

135136
#if SZ_USE_ICE
136-
bench_unary(env, "levenshtein_utf8_ice:batch"s + std::to_string(batch_size), call_baseline,
137+
bench_unary(env, "levenshtein_utf8_ice:batch"s + std::to_string(batch_size), call_utf8_baseline,
137138
similarities_callable<levenshtein_utf8_ice_t>(env, results_accelerated, batch_size),
138139
callable_no_op_t {}, // preprocessing
139140
similarities_equality_t {}) // equality check
141+
.log(utf8_baseline);
142+
scramble_accelerated_results();
143+
#endif
144+
}
145+
}
146+
147+
void bench_needleman_wunsch(environment_t const &env) {
148+
149+
using namespace std::string_literals; // for "s" suffix
150+
151+
#if SZ_USE_CUDA
152+
sz::gpu_specs_t specs = *sz::gpu_specs();
153+
#endif
154+
std::vector<std::size_t> batch_sizes = {1024 / 32, 1024, 1024 * 32};
155+
#if SZ_DEBUG
156+
batch_sizes = {1, 2, 32};
157+
#endif
158+
similarities_t results_baseline, results_accelerated;
159+
160+
auto scramble_accelerated_results = [&] {
161+
std::shuffle(results_accelerated.begin(), results_accelerated.end(), global_random_generator());
162+
};
163+
164+
for (std::size_t batch_size : batch_sizes) {
165+
results_baseline.resize(batch_size);
166+
results_accelerated.resize(batch_size);
167+
168+
auto call_baseline = similarities_callable<needleman_wunsch_serial_t>(env, results_baseline, batch_size);
169+
auto name_baseline = "needleman_wunsch_serial:batch"s + std::to_string(batch_size);
170+
bench_result_t baseline = bench_unary(env, name_baseline, call_baseline).log();
171+
172+
#if SZ_USE_ICE
173+
bench_unary(env, "needleman_wunsch_ice:batch"s + std::to_string(batch_size), call_baseline,
174+
similarities_callable<needleman_wunsch_ice_t>(env, results_accelerated, batch_size),
175+
callable_no_op_t {}, // preprocessing
176+
similarities_equality_t {}) // equality check
177+
.log(baseline);
178+
scramble_accelerated_results();
179+
#endif
180+
181+
#if SZ_USE_CUDA
182+
bench_unary(env, "needleman_wunsch_cuda:batch"s + std::to_string(batch_size), call_baseline,
183+
similarities_callable<needleman_wunsch_cuda_t, sz::gpu_specs_t>(env, results_accelerated,
184+
batch_size, specs),
185+
callable_no_op_t {}, // preprocessing
186+
similarities_equality_t {}) // equality check
140187
.log(baseline);
141188
scramble_accelerated_results();
142189
#endif

0 commit comments

Comments
 (0)