Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
# SPDX-FileCopyrightText: 2016-2025, Knut Reinert & MPI für molekulare Genetik
# SPDX-License-Identifier: CC0-1.0

# Format all files: find . -iname "*.[ch]pp" -not -path "./build/*" | xargs clang-format-19 --style=file -i
# Staged files: git diff --name-only HEAD --diff-filter=ACMRT | grep -E "(\.cpp|\.hpp)$" | xargs clang-format-19 --style=file -i
# Format all files: find . -iname "*.[ch]pp" -not -path "./build/*" | xargs clang-format-20 --style=file -i
# Staged files: git diff --name-only HEAD --diff-filter=ACMRT | grep -E "(\.cpp|\.hpp)$" | xargs clang-format-20 --style=file -i
---
Language: Cpp
AccessModifierOffset: -4
Expand Down Expand Up @@ -128,8 +128,10 @@ IncludeCategories:
Priority: 6
- Regex: '<fmindex-collection/'
Priority: 7
- Regex: '.*'
- Regex: '<cereal/'
Priority: 8
- Regex: '.*'
Priority: 9
IncludeIsMainRegex: '(Test)?$'
IncludeIsMainSourceRegex: ''
IndentAccessModifiers: false
Expand Down
18 changes: 18 additions & 0 deletions include/fpgalign/utility/fmindex.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

#pragma once

#include <fmindex-collection/fmindex/BiFMIndex.h>

#include <fpgalign/config.hpp>

namespace utility
{

void store(fmc::BiFMIndex<5> const & index, config const & config, size_t const id);

void load(fmc::BiFMIndex<5> & index, config const & config, size_t const id);

} // namespace utility
18 changes: 18 additions & 0 deletions include/fpgalign/utility/ibf.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

#pragma once

#include <hibf/interleaved_bloom_filter.hpp>

#include <fpgalign/config.hpp>

namespace utility
{

void store(seqan::hibf::interleaved_bloom_filter const & ibf, config const & config);

void load(seqan::hibf::interleaved_bloom_filter & ibf, config const & config);

} // namespace utility
17 changes: 17 additions & 0 deletions include/fpgalign/utility/meta.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

#pragma once

#include <fpgalign/config.hpp>
#include <fpgalign/meta.hpp>

namespace utility
{

void store(meta const & meta, config const & config);

void load(meta & meta, config const & config);

} // namespace utility
20 changes: 20 additions & 0 deletions include/fpgalign/utility/reference.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

#pragma once

#include <cstddef>
#include <cstdint>
#include <vector>

#include <fpgalign/config.hpp>

namespace utility
{

void store(std::vector<std::vector<uint8_t>> const & reference, config const & config, size_t const id);

void load(std::vector<std::vector<uint8_t>> & reference, config const & config, size_t const id);

} // namespace utility
4 changes: 4 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ set (FPGAlign_SOURCE_FILES
search/fmindex.cpp
search/search.cpp
search/do_alignment.cpp
utility/ibf.cpp
utility/fmindex.cpp
utility/meta.cpp
utility/reference.cpp
)

# An object library (without main) to be used in multiple targets.
Expand Down
10 changes: 2 additions & 8 deletions src/build/build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
#include <fstream>
#include <sstream>

#include <fmt/format.h>

#include <cereal/archives/binary.hpp>
#include <fpgalign/build/build.hpp>
#include <fpgalign/meta.hpp>
#include <fpgalign/utility/meta.hpp>

namespace build
{
Expand Down Expand Up @@ -50,11 +48,7 @@ void build(config const & config)
assert(meta.window_size == config.window_size);
build::fmindex(config, meta);

{
std::ofstream os{fmt::format("{}.meta", config.output_path.c_str()), std::ios::binary};
cereal::BinaryOutputArchive oarchive{os};
oarchive(meta);
}
utility::store(meta, config);
}

} // namespace build
26 changes: 5 additions & 21 deletions src/build/fmindex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,22 @@
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

#include <fmt/format.h>

#include <seqan3/io/sequence_file/input.hpp>

#include <fmindex-collection/fmindex/BiFMIndex.h>

#include <fpgalign/build/build.hpp>
#include <fpgalign/utility/fmindex.hpp>
#include <fpgalign/utility/reference.hpp>

namespace build
{

struct dna4_traits : seqan3::sequence_file_input_default_traits_dna
{
using sequence_alphabet = seqan3::dna4;
};

void read_reference_into(std::vector<std::vector<uint8_t>> & reference, meta & meta, size_t const i)
{
reference.clear();

for (auto const & bin_path : meta.bin_paths[i])
{
seqan3::sequence_file_input<dna4_traits, seqan3::fields<seqan3::field::seq, seqan3::field::id>> fin{bin_path};
seqfile_t fin{bin_path};

for (auto && record : fin)
{
Expand Down Expand Up @@ -56,17 +49,8 @@ void fmindex(config const & config, meta & meta)

fmc::BiFMIndex<5> index{reference, /*samplingRate*/ 16, /*threads*/ 1u};

{
std::ofstream os{fmt::format("{}.{}.fmindex", config.output_path.c_str(), i), std::ios::binary};
cereal::BinaryOutputArchive oarchive{os};
oarchive(index);
}

{
std::ofstream os{fmt::format("{}.{}.ref", config.output_path.c_str(), i), std::ios::binary};
cereal::BinaryOutputArchive oarchive{os};
oarchive(reference);
}
utility::store(index, config, i);
utility::store(reference, config, i);
}
}
}
Expand Down
24 changes: 7 additions & 17 deletions src/build/ibf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,39 +10,33 @@
#include <fpgalign/build/build.hpp>
#include <fpgalign/colored_strings.hpp>
#include <fpgalign/contrib/minimiser_hash.hpp>
#include <fpgalign/utility/ibf.hpp>

namespace build
{

struct dna4_traits : seqan3::sequence_file_input_default_traits_dna
{
using sequence_alphabet = seqan3::dna4;
};

void ibf(config const & config, meta & meta)
{
meta.kmer_size = config.kmer_size;
meta.window_size = config.window_size;

auto get_user_bin_data = [&](size_t const user_bin_id, seqan::hibf::insert_iterator it)
{
using sequence_file_t = seqan3::sequence_file_input<dna4_traits, seqan3::fields<seqan3::field::seq>>;

auto minimiser_view =
contrib::views::minimiser_hash({.kmer_size = config.kmer_size, .window_size = config.window_size});
auto minimiser_view = contrib::views::minimiser_hash({.kmer_size = config.kmer_size, //
.window_size = config.window_size});

for (auto && bin_path : meta.bin_paths[user_bin_id])
{
sequence_file_t fin{bin_path};
seqfile_t fin{bin_path};
for (auto && record : fin)
{
if (size_t const record_size = record.sequence().size(); record_size < config.window_size)
{
#pragma omp critical
{
std::cerr << colored_strings::cerr::warning << "File " << std::quoted(bin_path)
<< " contains a sequence of length " << record_size
<< ". This is shorter than the window size (" << config.window_size
<< " contains a sequence of length " << record_size << " (ID=" << record.id()
<< "). This is shorter than the window size (" << config.window_size
<< ") and will result in no k-mers being generated for this sequence. A user bin "
"without k-mers will result in an error.\n";
}
Expand All @@ -60,11 +54,7 @@ void ibf(config const & config, meta & meta)

seqan::hibf::interleaved_bloom_filter ibf{ibf_config};

{
std::ofstream os{config.output_path.string() + ".ibf", std::ios::binary};
cereal::BinaryOutputArchive oarchive{os};
oarchive(ibf);
}
utility::store(ibf, config);
}

} // namespace build
23 changes: 3 additions & 20 deletions src/search/fmindex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,16 @@
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

#include <fmt/format.h>

#include <seqan3/io/sequence_file/input.hpp>

#include <hibf/contrib/std/enumerate_view.hpp>

#include <fmindex-collection/fmindex/BiFMIndex.h>
#include <fmindex-collection/search/search.h>

#include <fpgalign/contrib/slotted_cart_queue.hpp>
#include <fpgalign/search/search.hpp>
#include <fpgalign/utility/fmindex.hpp>

namespace search
{

fmc::BiFMIndex<5> load_index(config const & config, size_t const id)
{
fmc::BiFMIndex<5> index{};

{
std::ifstream os{fmt::format("{}.{}.fmindex", config.input_path.c_str(), id), std::ios::binary};
cereal::BinaryInputArchive iarchive{os};
iarchive(index);
}

return index;
}

void fmindex(config const & config,
meta & meta,
scq::slotted_cart_queue<size_t> & filter_queue,
Expand All @@ -43,7 +25,8 @@ void fmindex(config const & config,
if (!cart.valid())
break;
auto [slot, span] = cart.get();
auto index = load_index(config, slot.value);
fmc::BiFMIndex<5> index{};
utility::load(index, config, slot.value);
for (auto idx : span)
{
auto callback = [&](auto cursor, size_t)
Expand Down
21 changes: 5 additions & 16 deletions src/search/ibf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,17 @@

#include <fpgalign/contrib/minimiser_hash.hpp>
#include <fpgalign/search/search.hpp>
#include <fpgalign/utility/ibf.hpp>
#include <threshold/threshold.hpp>

namespace search
{

struct dna4_traits : seqan3::sequence_file_input_default_traits_dna
{
using sequence_alphabet = seqan3::dna4;
};

threshold::threshold get_thresholder(config const & config, meta const & meta)
{
size_t const first_sequence_size = [&]()
{
seqan3::sequence_file_input<dna4_traits> fin{config.query_path};
seqfile_t fin{config.query_path};
auto & record = *fin.begin();
return record.sequence().size();
}();
Expand All @@ -36,18 +32,11 @@ threshold::threshold get_thresholder(config const & config, meta const & meta)
.errors = config.errors}};
}

using seqfile_t = seqan3::sequence_file_input<dna4_traits, seqan3::fields<seqan3::field::id, seqan3::field::seq>>;
using record_t = typename seqfile_t::record_type;

void ibf(config const & config, meta & meta, scq::slotted_cart_queue<size_t> & filter_queue)
{
seqan::hibf::interleaved_bloom_filter ibf{};
utility::load(ibf, config);

{
std::ifstream os{config.input_path.string() + ".ibf", std::ios::binary};
cereal::BinaryInputArchive iarchive{os};
iarchive(ibf);
}
assert(ibf.bin_count() == meta.number_of_bins);

meta.queries = [&]()
Expand All @@ -64,8 +53,8 @@ void ibf(config const & config, meta & meta, scq::slotted_cart_queue<size_t> & f
{
auto agent = ibf.membership_agent();
threshold::threshold const thresholder = get_thresholder(config, meta);
auto minimiser_view =
contrib::views::minimiser_hash({.kmer_size = meta.kmer_size, .window_size = meta.window_size});
auto minimiser_view = contrib::views::minimiser_hash({.kmer_size = meta.kmer_size, //
.window_size = meta.window_size});

std::vector<uint64_t> hashes;

Expand Down
17 changes: 4 additions & 13 deletions src/search/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,21 @@
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: BSD-3-Clause

#include <fmt/format.h>

#include <fpgalign/search/search.hpp>
#include <fpgalign/utility/meta.hpp>
#include <fpgalign/utility/reference.hpp>

namespace search
{

void search(config const & config)
{
meta meta{};
{
std::ifstream is{fmt::format("{}.meta", config.input_path.c_str()), std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
iarchive(meta);
}
utility::load(meta, config);

meta.references.resize(meta.number_of_bins);
for (size_t i = 0; i < meta.number_of_bins; ++i)
{

std::ifstream is{fmt::format("{}.{}.ref", config.input_path.c_str(), i), std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
iarchive(meta.references[i]);
}
utility::load(meta.references[i], config, i);

// todo capacity
// each slot = 1 bin
Expand Down
Loading