Skip to content

Commit 619fb7b

Browse files
authored
Merge pull request #7 from eseiler/misc/refactor
Refactor functions a bit
2 parents 554e324 + 11178dd commit 619fb7b

File tree

16 files changed

+229
-98
lines changed

16 files changed

+229
-98
lines changed

.clang-format

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
# SPDX-FileCopyrightText: 2016-2025, Knut Reinert & MPI für molekulare Genetik
33
# SPDX-License-Identifier: CC0-1.0
44

5-
# Format all files: find . -iname "*.[ch]pp" -not -path "./build/*" | xargs clang-format-19 --style=file -i
6-
# Staged files: git diff --name-only HEAD --diff-filter=ACMRT | grep -E "(\.cpp|\.hpp)$" | xargs clang-format-19 --style=file -i
5+
# Format all files: find . -iname "*.[ch]pp" -not -path "./build/*" | xargs clang-format-20 --style=file -i
6+
# Staged files: git diff --name-only HEAD --diff-filter=ACMRT | grep -E "(\.cpp|\.hpp)$" | xargs clang-format-20 --style=file -i
77
---
88
Language: Cpp
99
AccessModifierOffset: -4
@@ -128,8 +128,10 @@ IncludeCategories:
128128
Priority: 6
129129
- Regex: '<fmindex-collection/'
130130
Priority: 7
131-
- Regex: '.*'
131+
- Regex: '<cereal/'
132132
Priority: 8
133+
- Regex: '.*'
134+
Priority: 9
133135
IncludeIsMainRegex: '(Test)?$'
134136
IncludeIsMainSourceRegex: ''
135137
IndentAccessModifiers: false
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
2+
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
3+
// SPDX-License-Identifier: BSD-3-Clause
4+
5+
#pragma once
6+
7+
#include <fmindex-collection/fmindex/BiFMIndex.h>
8+
9+
#include <fpgalign/config.hpp>
10+
11+
namespace utility
12+
{
13+
14+
void store(fmc::BiFMIndex<5> const & index, config const & config, size_t const id);
15+
16+
void load(fmc::BiFMIndex<5> & index, config const & config, size_t const id);
17+
18+
} // namespace utility

include/fpgalign/utility/ibf.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
2+
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
3+
// SPDX-License-Identifier: BSD-3-Clause
4+
5+
#pragma once
6+
7+
#include <hibf/interleaved_bloom_filter.hpp>
8+
9+
#include <fpgalign/config.hpp>
10+
11+
namespace utility
12+
{
13+
14+
void store(seqan::hibf::interleaved_bloom_filter const & ibf, config const & config);
15+
16+
void load(seqan::hibf::interleaved_bloom_filter & ibf, config const & config);
17+
18+
} // namespace utility

include/fpgalign/utility/meta.hpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
2+
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
3+
// SPDX-License-Identifier: BSD-3-Clause
4+
5+
#pragma once
6+
7+
#include <fpgalign/config.hpp>
8+
#include <fpgalign/meta.hpp>
9+
10+
namespace utility
11+
{
12+
13+
void store(meta const & meta, config const & config);
14+
15+
void load(meta & meta, config const & config);
16+
17+
} // namespace utility
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
2+
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
3+
// SPDX-License-Identifier: BSD-3-Clause
4+
5+
#pragma once
6+
7+
#include <cstddef>
8+
#include <cstdint>
9+
#include <vector>
10+
11+
#include <fpgalign/config.hpp>
12+
13+
namespace utility
14+
{
15+
16+
void store(std::vector<std::vector<uint8_t>> const & reference, config const & config, size_t const id);
17+
18+
void load(std::vector<std::vector<uint8_t>> & reference, config const & config, size_t const id);
19+
20+
} // namespace utility

src/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ set (FPGAlign_SOURCE_FILES
1414
search/fmindex.cpp
1515
search/search.cpp
1616
search/do_alignment.cpp
17+
utility/ibf.cpp
18+
utility/fmindex.cpp
19+
utility/meta.cpp
20+
utility/reference.cpp
1721
)
1822

1923
# An object library (without main) to be used in multiple targets.

src/build/build.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,9 @@
66
#include <fstream>
77
#include <sstream>
88

9-
#include <fmt/format.h>
10-
11-
#include <cereal/archives/binary.hpp>
129
#include <fpgalign/build/build.hpp>
1310
#include <fpgalign/meta.hpp>
11+
#include <fpgalign/utility/meta.hpp>
1412

1513
namespace build
1614
{
@@ -50,11 +48,7 @@ void build(config const & config)
5048
assert(meta.window_size == config.window_size);
5149
build::fmindex(config, meta);
5250

53-
{
54-
std::ofstream os{fmt::format("{}.meta", config.output_path.c_str()), std::ios::binary};
55-
cereal::BinaryOutputArchive oarchive{os};
56-
oarchive(meta);
57-
}
51+
utility::store(meta, config);
5852
}
5953

6054
} // namespace build

src/build/fmindex.cpp

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,29 +2,22 @@
22
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
33
// SPDX-License-Identifier: BSD-3-Clause
44

5-
#include <fmt/format.h>
6-
7-
#include <seqan3/io/sequence_file/input.hpp>
8-
95
#include <fmindex-collection/fmindex/BiFMIndex.h>
106

117
#include <fpgalign/build/build.hpp>
8+
#include <fpgalign/utility/fmindex.hpp>
9+
#include <fpgalign/utility/reference.hpp>
1210

1311
namespace build
1412
{
1513

16-
struct dna4_traits : seqan3::sequence_file_input_default_traits_dna
17-
{
18-
using sequence_alphabet = seqan3::dna4;
19-
};
20-
2114
void read_reference_into(std::vector<std::vector<uint8_t>> & reference, meta & meta, size_t const i)
2215
{
2316
reference.clear();
2417

2518
for (auto const & bin_path : meta.bin_paths[i])
2619
{
27-
seqan3::sequence_file_input<dna4_traits, seqan3::fields<seqan3::field::seq, seqan3::field::id>> fin{bin_path};
20+
seqfile_t fin{bin_path};
2821

2922
for (auto && record : fin)
3023
{
@@ -56,17 +49,8 @@ void fmindex(config const & config, meta & meta)
5649

5750
fmc::BiFMIndex<5> index{reference, /*samplingRate*/ 16, /*threads*/ 1u};
5851

59-
{
60-
std::ofstream os{fmt::format("{}.{}.fmindex", config.output_path.c_str(), i), std::ios::binary};
61-
cereal::BinaryOutputArchive oarchive{os};
62-
oarchive(index);
63-
}
64-
65-
{
66-
std::ofstream os{fmt::format("{}.{}.ref", config.output_path.c_str(), i), std::ios::binary};
67-
cereal::BinaryOutputArchive oarchive{os};
68-
oarchive(reference);
69-
}
52+
utility::store(index, config, i);
53+
utility::store(reference, config, i);
7054
}
7155
}
7256
}

src/build/ibf.cpp

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,39 +10,33 @@
1010
#include <fpgalign/build/build.hpp>
1111
#include <fpgalign/colored_strings.hpp>
1212
#include <fpgalign/contrib/minimiser_hash.hpp>
13+
#include <fpgalign/utility/ibf.hpp>
1314

1415
namespace build
1516
{
1617

17-
struct dna4_traits : seqan3::sequence_file_input_default_traits_dna
18-
{
19-
using sequence_alphabet = seqan3::dna4;
20-
};
21-
2218
void ibf(config const & config, meta & meta)
2319
{
2420
meta.kmer_size = config.kmer_size;
2521
meta.window_size = config.window_size;
2622

2723
auto get_user_bin_data = [&](size_t const user_bin_id, seqan::hibf::insert_iterator it)
2824
{
29-
using sequence_file_t = seqan3::sequence_file_input<dna4_traits, seqan3::fields<seqan3::field::seq>>;
30-
31-
auto minimiser_view =
32-
contrib::views::minimiser_hash({.kmer_size = config.kmer_size, .window_size = config.window_size});
25+
auto minimiser_view = contrib::views::minimiser_hash({.kmer_size = config.kmer_size, //
26+
.window_size = config.window_size});
3327

3428
for (auto && bin_path : meta.bin_paths[user_bin_id])
3529
{
36-
sequence_file_t fin{bin_path};
30+
seqfile_t fin{bin_path};
3731
for (auto && record : fin)
3832
{
3933
if (size_t const record_size = record.sequence().size(); record_size < config.window_size)
4034
{
4135
#pragma omp critical
4236
{
4337
std::cerr << colored_strings::cerr::warning << "File " << std::quoted(bin_path)
44-
<< " contains a sequence of length " << record_size
45-
<< ". This is shorter than the window size (" << config.window_size
38+
<< " contains a sequence of length " << record_size << " (ID=" << record.id()
39+
<< "). This is shorter than the window size (" << config.window_size
4640
<< ") and will result in no k-mers being generated for this sequence. A user bin "
4741
"without k-mers will result in an error.\n";
4842
}
@@ -60,11 +54,7 @@ void ibf(config const & config, meta & meta)
6054

6155
seqan::hibf::interleaved_bloom_filter ibf{ibf_config};
6256

63-
{
64-
std::ofstream os{config.output_path.string() + ".ibf", std::ios::binary};
65-
cereal::BinaryOutputArchive oarchive{os};
66-
oarchive(ibf);
67-
}
57+
utility::store(ibf, config);
6858
}
6959

7060
} // namespace build

src/search/fmindex.cpp

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,34 +2,16 @@
22
// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
33
// SPDX-License-Identifier: BSD-3-Clause
44

5-
#include <fmt/format.h>
6-
7-
#include <seqan3/io/sequence_file/input.hpp>
8-
9-
#include <hibf/contrib/std/enumerate_view.hpp>
10-
115
#include <fmindex-collection/fmindex/BiFMIndex.h>
126
#include <fmindex-collection/search/search.h>
137

148
#include <fpgalign/contrib/slotted_cart_queue.hpp>
159
#include <fpgalign/search/search.hpp>
10+
#include <fpgalign/utility/fmindex.hpp>
1611

1712
namespace search
1813
{
1914

20-
fmc::BiFMIndex<5> load_index(config const & config, size_t const id)
21-
{
22-
fmc::BiFMIndex<5> index{};
23-
24-
{
25-
std::ifstream os{fmt::format("{}.{}.fmindex", config.input_path.c_str(), id), std::ios::binary};
26-
cereal::BinaryInputArchive iarchive{os};
27-
iarchive(index);
28-
}
29-
30-
return index;
31-
}
32-
3315
void fmindex(config const & config,
3416
meta & meta,
3517
scq::slotted_cart_queue<size_t> & filter_queue,
@@ -43,7 +25,8 @@ void fmindex(config const & config,
4325
if (!cart.valid())
4426
break;
4527
auto [slot, span] = cart.get();
46-
auto index = load_index(config, slot.value);
28+
fmc::BiFMIndex<5> index{};
29+
utility::load(index, config, slot.value);
4730
for (auto idx : span)
4831
{
4932
auto callback = [&](auto cursor, size_t)

0 commit comments

Comments
 (0)