|
| 1 | +// ----------------------------------------------------------------------------------------------------- |
| 2 | +// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin |
| 3 | +// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik |
| 4 | +// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License |
| 5 | +// shipped with this file and also available at: https://github.com/seqan/raptor/blob/master/LICENSE.md |
| 6 | +// ----------------------------------------------------------------------------------------------------- |
| 7 | + |
| 8 | +#include <benchmark/benchmark.h> |
| 9 | + |
| 10 | +#include <seqan3/alphabet/nucleotide/dna4.hpp> |
| 11 | +#include <seqan3/utility/views/slice.hpp> |
| 12 | + |
| 13 | +#include <seqan3/search/dream_index/interleaved_bloom_filter.hpp> |
| 14 | +#include <seqan3/search/views/kmer_hash.hpp> |
| 15 | +#include <seqan3/test/performance/sequence_generator.hpp> |
| 16 | + |
| 17 | +static constexpr size_t const genome_size{5000}; // 4'300'000'000 |
| 18 | +static constexpr size_t const read_size{100}; |
| 19 | +static constexpr size_t const read_count{1000}; // 1'000'000 |
| 20 | +static constexpr size_t const ibf_size{8'388'608/*=1MiB*/}; // 34'359'738'368/*=4GiB*/ |
| 21 | + |
| 22 | +static std::vector<seqan3::dna4> const genome{seqan3::test::generate_sequence<seqan3::dna4>(genome_size, 0, 0)}; |
| 23 | +static std::vector<std::vector<seqan3::dna4>> const reads{[] (auto const & genome) { |
| 24 | + std::vector<std::vector<seqan3::dna4>> result(read_count); |
| 25 | + size_t i{}; |
| 26 | + for (auto && read_start : seqan3::test::generate_numeric_sequence<size_t>(read_count, 0, genome_size - read_size + 1, 0)) |
| 27 | + { |
| 28 | + auto v = genome | seqan3::views::slice(read_start, read_start + read_size); |
| 29 | + result[i++].assign(v.begin(), v.end()); |
| 30 | + } |
| 31 | + return result; |
| 32 | + } (genome)}; |
| 33 | + |
| 34 | +static void search_benchmark(benchmark::State & state) |
| 35 | +{ |
| 36 | + size_t const bin_count = static_cast<size_t>(state.range(0)); |
| 37 | + size_t const hash_num{2u}; |
| 38 | + size_t const bin_size{ibf_size / bin_count}; |
| 39 | + size_t const chunk_size{(genome_size + bin_count - 1) / bin_count}; |
| 40 | + |
| 41 | + seqan3::interleaved_bloom_filter<seqan3::data_layout::uncompressed> ibf{seqan3::bin_count{bin_count}, |
| 42 | + seqan3::bin_size{bin_size}, |
| 43 | + seqan3::hash_function_count{hash_num}}; |
| 44 | + |
| 45 | + size_t bin_counter{}; |
| 46 | + for (auto && sequence : genome | seqan3::views::chunk(chunk_size)) |
| 47 | + for (auto && hash : sequence | seqan3::views::kmer_hash(seqan3::ungapped{19u})) |
| 48 | + ibf.emplace(hash, seqan3::bin_index{bin_counter++}); |
| 49 | + |
| 50 | + auto agent = ibf.counting_agent<uint16_t>(); |
| 51 | + for (auto _ : state) |
| 52 | + for (auto && query : reads) |
| 53 | + benchmark::DoNotOptimize(agent.bulk_count(query | seqan3::views::kmer_hash(seqan3::ungapped{19u}))); |
| 54 | +} |
| 55 | + |
| 56 | +BENCHMARK(search_benchmark)->RangeMultiplier(2)->Range(64, 65536); |
| 57 | + |
| 58 | +BENCHMARK_MAIN(); |
0 commit comments