diff --git a/.clang-format b/.clang-format index 891f5a3..629fb07 100644 --- a/.clang-format +++ b/.clang-format @@ -2,8 +2,8 @@ # SPDX-FileCopyrightText: 2016-2025, Knut Reinert & MPI für molekulare Genetik # SPDX-License-Identifier: CC0-1.0 -# Format all files: find . -iname "*.[ch]pp" -not -path "./build/*" | xargs clang-format-19 --style=file -i -# Staged files: git diff --name-only HEAD --diff-filter=ACMRT | grep -E "(\.cpp|\.hpp)$" | xargs clang-format-19 --style=file -i +# Format all files: find . -iname "*.[ch]pp" -not -path "./build/*" | xargs clang-format-20 --style=file -i +# Staged files: git diff --name-only HEAD --diff-filter=ACMRT | grep -E "(\.cpp|\.hpp)$" | xargs clang-format-20 --style=file -i --- Language: Cpp AccessModifierOffset: -4 @@ -128,8 +128,10 @@ IncludeCategories: Priority: 6 - Regex: ' + +#include + +namespace utility +{ + +void store(fmc::BiFMIndex<5> const & index, config const & config, size_t const id); + +void load(fmc::BiFMIndex<5> & index, config const & config, size_t const id); + +} // namespace utility diff --git a/include/fpgalign/utility/ibf.hpp b/include/fpgalign/utility/ibf.hpp new file mode 100644 index 0000000..144a1ef --- /dev/null +++ b/include/fpgalign/utility/ibf.hpp @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + +#include + +#include + +namespace utility +{ + +void store(seqan::hibf::interleaved_bloom_filter const & ibf, config const & config); + +void load(seqan::hibf::interleaved_bloom_filter & ibf, config const & config); + +} // namespace utility diff --git a/include/fpgalign/utility/meta.hpp b/include/fpgalign/utility/meta.hpp new file mode 100644 index 0000000..010af12 --- /dev/null +++ b/include/fpgalign/utility/meta.hpp @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + +#include +#include + +namespace utility +{ + +void store(meta const & meta, config const & config); + +void load(meta & meta, config const & config); + +} // namespace utility diff --git a/include/fpgalign/utility/reference.hpp b/include/fpgalign/utility/reference.hpp new file mode 100644 index 0000000..2dd9c5f --- /dev/null +++ b/include/fpgalign/utility/reference.hpp @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +#pragma once + +#include +#include +#include + +#include + +namespace utility +{ + +void store(std::vector> const & reference, config const & config, size_t const id); + +void load(std::vector> & reference, config const & config, size_t const id); + +} // namespace utility diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dd6f896..10f54e9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -14,6 +14,10 @@ set (FPGAlign_SOURCE_FILES search/fmindex.cpp search/search.cpp search/do_alignment.cpp + utility/ibf.cpp + utility/fmindex.cpp + utility/meta.cpp + utility/reference.cpp ) # An object library (without main) to be used in multiple targets. diff --git a/src/build/build.cpp b/src/build/build.cpp index 3ae44e6..d23b3c5 100644 --- a/src/build/build.cpp +++ b/src/build/build.cpp @@ -6,11 +6,9 @@ #include #include -#include - -#include #include #include +#include namespace build { @@ -50,11 +48,7 @@ void build(config const & config) assert(meta.window_size == config.window_size); build::fmindex(config, meta); - { - std::ofstream os{fmt::format("{}.meta", config.output_path.c_str()), std::ios::binary}; - cereal::BinaryOutputArchive oarchive{os}; - oarchive(meta); - } + utility::store(meta, config); } } // namespace build diff --git a/src/build/fmindex.cpp b/src/build/fmindex.cpp index 4c659e4..8494955 100644 --- a/src/build/fmindex.cpp +++ b/src/build/fmindex.cpp @@ -2,29 +2,22 @@ // SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik // SPDX-License-Identifier: BSD-3-Clause -#include - -#include - #include #include +#include +#include namespace build { -struct dna4_traits : seqan3::sequence_file_input_default_traits_dna -{ - using sequence_alphabet = seqan3::dna4; -}; - void read_reference_into(std::vector> & reference, meta & meta, size_t const i) { reference.clear(); for (auto const & bin_path : meta.bin_paths[i]) { - seqan3::sequence_file_input> fin{bin_path}; + seqfile_t fin{bin_path}; for (auto && record : fin) { @@ -56,17 +49,8 @@ void fmindex(config const & config, meta & meta) fmc::BiFMIndex<5> index{reference, /*samplingRate*/ 16, /*threads*/ 1u}; - { - std::ofstream os{fmt::format("{}.{}.fmindex", config.output_path.c_str(), i), std::ios::binary}; - cereal::BinaryOutputArchive oarchive{os}; - oarchive(index); - } - - { - std::ofstream os{fmt::format("{}.{}.ref", config.output_path.c_str(), i), std::ios::binary}; - cereal::BinaryOutputArchive oarchive{os}; - oarchive(reference); - } + utility::store(index, config, i); + utility::store(reference, config, i); } } } diff --git a/src/build/ibf.cpp b/src/build/ibf.cpp index 5ce357b..1273df7 100644 --- a/src/build/ibf.cpp +++ b/src/build/ibf.cpp @@ -10,15 +10,11 @@ #include #include #include +#include namespace build { -struct dna4_traits : seqan3::sequence_file_input_default_traits_dna -{ - using sequence_alphabet = seqan3::dna4; -}; - void ibf(config const & config, meta & meta) { meta.kmer_size = config.kmer_size; @@ -26,14 +22,12 @@ void ibf(config const & config, meta & meta) auto get_user_bin_data = [&](size_t const user_bin_id, seqan::hibf::insert_iterator it) { - using sequence_file_t = seqan3::sequence_file_input>; - - auto minimiser_view = - contrib::views::minimiser_hash({.kmer_size = config.kmer_size, .window_size = config.window_size}); + auto minimiser_view = contrib::views::minimiser_hash({.kmer_size = config.kmer_size, // + .window_size = config.window_size}); for (auto && bin_path : meta.bin_paths[user_bin_id]) { - sequence_file_t fin{bin_path}; + seqfile_t fin{bin_path}; for (auto && record : fin) { if (size_t const record_size = record.sequence().size(); record_size < config.window_size) @@ -41,8 +35,8 @@ void ibf(config const & config, meta & meta) #pragma omp critical { std::cerr << colored_strings::cerr::warning << "File " << std::quoted(bin_path) - << " contains a sequence of length " << record_size - << ". This is shorter than the window size (" << config.window_size + << " contains a sequence of length " << record_size << " (ID=" << record.id() + << "). This is shorter than the window size (" << config.window_size << ") and will result in no k-mers being generated for this sequence. A user bin " "without k-mers will result in an error.\n"; } @@ -60,11 +54,7 @@ void ibf(config const & config, meta & meta) seqan::hibf::interleaved_bloom_filter ibf{ibf_config}; - { - std::ofstream os{config.output_path.string() + ".ibf", std::ios::binary}; - cereal::BinaryOutputArchive oarchive{os}; - oarchive(ibf); - } + utility::store(ibf, config); } } // namespace build diff --git a/src/search/fmindex.cpp b/src/search/fmindex.cpp index c04c1a7..f2af1b1 100644 --- a/src/search/fmindex.cpp +++ b/src/search/fmindex.cpp @@ -2,34 +2,16 @@ // SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik // SPDX-License-Identifier: BSD-3-Clause -#include - -#include - -#include - #include #include #include #include +#include namespace search { -fmc::BiFMIndex<5> load_index(config const & config, size_t const id) -{ - fmc::BiFMIndex<5> index{}; - - { - std::ifstream os{fmt::format("{}.{}.fmindex", config.input_path.c_str(), id), std::ios::binary}; - cereal::BinaryInputArchive iarchive{os}; - iarchive(index); - } - - return index; -} - void fmindex(config const & config, meta & meta, scq::slotted_cart_queue & filter_queue, @@ -43,7 +25,8 @@ void fmindex(config const & config, if (!cart.valid()) break; auto [slot, span] = cart.get(); - auto index = load_index(config, slot.value); + fmc::BiFMIndex<5> index{}; + utility::load(index, config, slot.value); for (auto idx : span) { auto callback = [&](auto cursor, size_t) diff --git a/src/search/ibf.cpp b/src/search/ibf.cpp index 09d8f06..be19de0 100644 --- a/src/search/ibf.cpp +++ b/src/search/ibf.cpp @@ -11,21 +11,17 @@ #include #include +#include #include namespace search { -struct dna4_traits : seqan3::sequence_file_input_default_traits_dna -{ - using sequence_alphabet = seqan3::dna4; -}; - threshold::threshold get_thresholder(config const & config, meta const & meta) { size_t const first_sequence_size = [&]() { - seqan3::sequence_file_input fin{config.query_path}; + seqfile_t fin{config.query_path}; auto & record = *fin.begin(); return record.sequence().size(); }(); @@ -36,18 +32,11 @@ threshold::threshold get_thresholder(config const & config, meta const & meta) .errors = config.errors}}; } -using seqfile_t = seqan3::sequence_file_input>; -using record_t = typename seqfile_t::record_type; - void ibf(config const & config, meta & meta, scq::slotted_cart_queue & filter_queue) { seqan::hibf::interleaved_bloom_filter ibf{}; + utility::load(ibf, config); - { - std::ifstream os{config.input_path.string() + ".ibf", std::ios::binary}; - cereal::BinaryInputArchive iarchive{os}; - iarchive(ibf); - } assert(ibf.bin_count() == meta.number_of_bins); meta.queries = [&]() @@ -64,8 +53,8 @@ void ibf(config const & config, meta & meta, scq::slotted_cart_queue & f { auto agent = ibf.membership_agent(); threshold::threshold const thresholder = get_thresholder(config, meta); - auto minimiser_view = - contrib::views::minimiser_hash({.kmer_size = meta.kmer_size, .window_size = meta.window_size}); + auto minimiser_view = contrib::views::minimiser_hash({.kmer_size = meta.kmer_size, // + .window_size = meta.window_size}); std::vector hashes; diff --git a/src/search/search.cpp b/src/search/search.cpp index dc000f9..31657a7 100644 --- a/src/search/search.cpp +++ b/src/search/search.cpp @@ -2,9 +2,9 @@ // SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik // SPDX-License-Identifier: BSD-3-Clause -#include - #include +#include +#include namespace search { @@ -12,20 +12,11 @@ namespace search void search(config const & config) { meta meta{}; - { - std::ifstream is{fmt::format("{}.meta", config.input_path.c_str()), std::ios::binary}; - cereal::BinaryInputArchive iarchive{is}; - iarchive(meta); - } + utility::load(meta, config); meta.references.resize(meta.number_of_bins); for (size_t i = 0; i < meta.number_of_bins; ++i) - { - - std::ifstream is{fmt::format("{}.{}.ref", config.input_path.c_str(), i), std::ios::binary}; - cereal::BinaryInputArchive iarchive{is}; - iarchive(meta.references[i]); - } + utility::load(meta.references[i], config, i); // todo capacity // each slot = 1 bin diff --git a/src/utility/fmindex.cpp b/src/utility/fmindex.cpp new file mode 100644 index 0000000..7433888 --- /dev/null +++ b/src/utility/fmindex.cpp @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +#include + +#include + +#include + +#include + +namespace utility +{ + +void store(fmc::BiFMIndex<5> const & index, config const & config, size_t const id) +{ + std::ofstream os{fmt::format("{}.{}.fmindex", config.output_path.c_str(), id), std::ios::binary}; + cereal::BinaryOutputArchive oarchive{os}; + oarchive(index); +} + +void load(fmc::BiFMIndex<5> & index, config const & config, size_t const id) +{ + std::ifstream is{fmt::format("{}.{}.fmindex", config.input_path.c_str(), id), std::ios::binary}; + cereal::BinaryInputArchive iarchive{is}; + iarchive(index); +} + +} // namespace utility diff --git a/src/utility/ibf.cpp b/src/utility/ibf.cpp new file mode 100644 index 0000000..4f9312c --- /dev/null +++ b/src/utility/ibf.cpp @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +#include + +#include + +#include + +#include + +namespace utility +{ + +void store(seqan::hibf::interleaved_bloom_filter const & ibf, config const & config) +{ + std::ofstream os{fmt::format("{}.ibf", config.input_path.c_str()), std::ios::binary}; + cereal::BinaryOutputArchive oarchive{os}; + oarchive(ibf); +} + +void load(seqan::hibf::interleaved_bloom_filter & ibf, config const & config) +{ + std::ifstream is{fmt::format("{}.ibf", config.input_path.string()), std::ios::binary}; + cereal::BinaryInputArchive iarchive{is}; + iarchive(ibf); +} + +} // namespace utility diff --git a/src/utility/meta.cpp b/src/utility/meta.cpp new file mode 100644 index 0000000..f063907 --- /dev/null +++ b/src/utility/meta.cpp @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +#include + +#include + +#include + +#include + +namespace utility +{ + +void store(meta const & meta, config const & config) +{ + std::ofstream os{fmt::format("{}.meta", config.output_path.c_str()), std::ios::binary}; + cereal::BinaryOutputArchive oarchive{os}; + oarchive(meta); +} + +void load(meta & meta, config const & config) +{ + std::ifstream is{fmt::format("{}.meta", config.input_path.c_str()), std::ios::binary}; + cereal::BinaryInputArchive iarchive{is}; + iarchive(meta); +} + +} // namespace utility diff --git a/src/utility/reference.cpp b/src/utility/reference.cpp new file mode 100644 index 0000000..248dc59 --- /dev/null +++ b/src/utility/reference.cpp @@ -0,0 +1,31 @@ +// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin +// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik +// SPDX-License-Identifier: BSD-3-Clause + +#include + +#include + +#include +#include + +#include + +namespace utility +{ + +void store(std::vector> const & reference, config const & config, size_t const id) +{ + std::ofstream os{fmt::format("{}.{}.ref", config.output_path.c_str(), id), std::ios::binary}; + cereal::BinaryOutputArchive oarchive{os}; + oarchive(reference); +} + +void load(std::vector> & reference, config const & config, size_t const id) +{ + std::ifstream is{fmt::format("{}.{}.ref", config.input_path.c_str(), id), std::ios::binary}; + cereal::BinaryInputArchive iarchive{is}; + iarchive(reference); +} + +} // namespace utility