Skip to content

Commit d6e1393

Browse files
committed
[UTIL] inspect
1 parent 62110d5 commit d6e1393

File tree

8 files changed

+613
-6
lines changed

8 files changed

+613
-6
lines changed

.clang-format

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -127,27 +127,31 @@ IncludeCategories:
127127
Priority: 2
128128
SortPriority: 0
129129
CaseSensitive: false
130-
- Regex: '<sharg/'
130+
- Regex: '<fmt/'
131131
Priority: 3
132132
SortPriority: 0
133133
CaseSensitive: false
134-
- Regex: '<seqan3/'
134+
- Regex: '<sharg/'
135135
Priority: 4
136136
SortPriority: 0
137137
CaseSensitive: false
138+
- Regex: '<seqan3/'
139+
Priority: 5
140+
SortPriority: 0
141+
CaseSensitive: false
138142
- Regex: '<hibf/'
139-
Priority: 6
143+
Priority: 7
140144
SortPriority: 0
141145
CaseSensitive: false
142146
- Regex: '<.*>'
143-
Priority: 5
147+
Priority: 6
144148
SortPriority: 0
145149
CaseSensitive: false
146150
- Regex: '.*'
147-
Priority: 7
151+
Priority: 8
148152
SortPriority: 0
149153
CaseSensitive: false
150-
IncludeIsMainRegex: '(Test)?$'
154+
IncludeIsMainRegex: 'x$'
151155
IncludeIsMainSourceRegex: ''
152156
IndentAccessModifiers: false
153157
IndentCaseBlocks: false

include/hibf/interleaved_bloom_filter.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ namespace seqan::hibf
3939
// config.hpp -> misc/insert_iterator.hpp (Needs interleaved_bloom_filter to be a complete class)
4040
struct config;
4141

42+
/*!\brief For testing: An accessor used to inspect private members of seqan::hibf::interleaved_bloom_filter.
43+
* \ingroup ibf
44+
*/
45+
struct inspector;
46+
4247
/*!\brief A strong type that represents the number of bins for the seqan::hibf::interleaved_bloom_filter.
4348
* \ingroup ibf
4449
* \qualifier strong
@@ -148,6 +153,9 @@ class interleaved_bloom_filter : private seqan::hibf::bit_vector
148153
template <typename t>
149154
friend struct cereal::base_class;
150155

156+
//!\brief Allow access to private members. Used for util/inspect.
157+
friend struct seqan::hibf::inspector;
158+
151159
//!\brief The number of bins specified by the user.
152160
size_t bins{};
153161
//!\brief The number of bins stored in the IBF (next multiple of 64 of `bins`).

util/CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ add_subdirectory ("${HIBF_ROOT_DIR}" "${CMAKE_CURRENT_BINARY_DIR}/hibf_lib")
1212

1313
# Dependency: Sharg
1414
CPMAddPackage (URI "gh:seqan/sharg-parser#main"
15+
SYSTEM TRUE
1516
OPTIONS "INSTALL_SHARG OFF" "INSTALL_TDL OFF" "CMAKE_MESSAGE_LOG_LEVEL WARNING" "BUILD_TESTING OFF"
1617
"SHARG_NO_TDL ON")
1718

@@ -20,3 +21,19 @@ target_link_libraries (fpr_correction_check seqan::hibf sharg::sharg)
2021

2122
add_executable (hash_collisions hash_collisions.cpp)
2223
target_link_libraries (hash_collisions seqan::hibf sharg::sharg)
24+
25+
set (CMAKE_REQUIRED_QUIET 1)
26+
check_cxx_source_runs ("#include <generator>\nint main(){}" HIBF_HAS_COROUTINE_GENERATOR)
27+
if (HIBF_HAS_COROUTINE_GENERATOR)
28+
# Dependency: fmt
29+
CPMAddPackage (URI "gh:fmtlib/fmt#11.2.0"
30+
SYSTEM TRUE
31+
OPTIONS "FMT_INSTALL OFF" "CMAKE_MESSAGE_LOG_LEVEL WARNING")
32+
33+
add_executable (inspect inspect/inspect.cpp)
34+
target_link_libraries (inspect seqan::hibf sharg::sharg fmt::fmt)
35+
else ()
36+
# Available starting with GCC 14.
37+
# Clang's libc++ does not support std::generator yet (Clang-20).
38+
message (STATUS " Not building inspect because std::generator is not available.")
39+
endif ()

util/inspect/example.md

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
<!--
2+
SPDX-FileCopyrightText: 2006-2025, Knut Reinert & Freie Universität Berlin
3+
SPDX-FileCopyrightText: 2016-2025, Knut Reinert & MPI für molekulare Genetik
4+
SPDX-License-Identifier: CC-BY-4.0
5+
-->
6+
7+
# Nomenclature
8+
9+
For an IBF, `User bins` and `Active bins` are the same.
10+
In an HIBF,
11+
* `User bins`: Bins that actually contain the data of a user bin.
12+
* `Active bins`: Merged or deleted bins (see `seqan::hibf::bin_kind`).
13+
14+
# Format
15+
16+
* The data for each IBF is printed in a block.
17+
* The children of an IBF are printed after the parent.
18+
* Intendation levels are used to indicate the hierarchy of IBFs, e.g., the children of an IBF are indented by one level.
19+
* A level of indentation is four spaces.
20+
* The data for an IBF in the HIBF is indented by two spaces.
21+
* The [HIBF with Format Annotations](#hibf-with-format-annotations) section shows an output with explanations/annotations.
22+
23+
# Miscellaneous
24+
25+
* The output is colored on the terminal.
26+
* The output is not colored when redirected/piped.
27+
* Test data may not be up-to-date if IBF/HIBF implementations have changed.
28+
* Test data added: 2025-07-29
29+
30+
# IBF
31+
32+
**Example data:** https://ftp.seqan.de/hibf/util.test.ibf \
33+
**Command:** `./inspect --input util.test.ibf`
34+
35+
```text
36+
User bins: 128
37+
Technical bins: 128
38+
Empty bins: 0
39+
Bin words: 2
40+
Bin size: 1,024
41+
Total size: 131,072
42+
Hash functions: 3
43+
Hash shift: 53
44+
```
45+
46+
# Simple HIBF
47+
48+
**Example data:** https://ftp.seqan.de/hibf/util.test.simple.hibf \
49+
**Command:** `./inspect --input util.test.simple.hibf --hibf`
50+
51+
```text
52+
User bins: 2
53+
Empty bins: 0
54+
IBFs: 1
55+
Total size: 4,112
56+
ID: 0 Level: 0 Children: 0
57+
User bins: 2
58+
Active bins: 64
59+
Technical bins: 64
60+
Empty bins: 0
61+
Bin words: 1
62+
Bin size: 48
63+
Total size: 3,072
64+
Hash functions: 2
65+
Hash shift: 58
66+
```
67+
68+
# HIBF
69+
70+
**Example data:** https://ftp.seqan.de/hibf/util.test.hibf \
71+
**Command:** `./inspect --input util.test.hibf --hibf`
72+
73+
```text
74+
User bins: 4,097
75+
Empty bins: 0
76+
IBFs: 66
77+
Total size: 5,362,464
78+
ID: 0 Level: 0 Children: 64
79+
User bins: 0
80+
Active bins: 64
81+
Technical bins: 64
82+
Empty bins: 0
83+
Bin words: 1
84+
Bin size: 2,642
85+
Total size: 169,088
86+
Hash functions: 2
87+
Hash shift: 52
88+
ID: 3 Level: 1 Children: 0
89+
User bins: 64
90+
Active bins: 64
91+
Technical bins: 64
92+
Empty bins: 0
93+
Bin words: 1
94+
Bin size: 1,245
95+
Total size: 79,680
96+
Hash functions: 2
97+
Hash shift: 53
98+
[...]
99+
ID: 1 Level: 1 Children: 1
100+
User bins: 63
101+
Active bins: 64
102+
Technical bins: 64
103+
Empty bins: 0
104+
Bin words: 1
105+
Bin size: 1,245
106+
Total size: 79,680
107+
Hash functions: 2
108+
Hash shift: 53
109+
ID: 2 Level: 2 Children: 0
110+
User bins: 2
111+
Active bins: 64
112+
Technical bins: 64
113+
Empty bins: 0
114+
Bin words: 1
115+
Bin size: 394
116+
Total size: 25,216
117+
Hash functions: 2
118+
Hash shift: 55
119+
```
120+
121+
# HIBF with Format Annotations
122+
123+
```text
124+
User bins: 4,097 ──────┐
125+
Empty bins: 0 ├ HIBF Metadata
126+
IBFs: 66 │
127+
Total size: 5,362,464 ─┘
128+
ID: 0 Level: 0 Children: 64 ─┬── Root IBF
129+
User bins: 0 │
130+
Active bins: 64 │
131+
Technical bins: 64 │
132+
Empty bins: 0 │
133+
Bin words: 1 ├ Root IBF Metadata
134+
Bin size: 2,642 │
135+
Total size: 169,088 │
136+
Hash functions: 2 │
137+
Hash shift: 52 ─────────────┘
138+
ID: 3 Level: 1 Children: 0 ──────┐
139+
User bins: 64 │
140+
Active bins: 64 │
141+
Technical bins: 64 │
142+
Empty bins: 0 │
143+
Bin words: 1 │
144+
Bin size: 1,245 │
145+
Total size: 79,680 │
146+
Hash functions: 2 │
147+
Hash shift: 53 │
148+
[...] ├ Direct Children if the Root IBF
149+
ID: 1 Level: 1 Children: 1 │
150+
User bins: 63 │
151+
Active bins: 64 │
152+
Technical bins: 64 │
153+
Empty bins: 0 │
154+
Bin words: 1 │
155+
Bin size: 1,245 │
156+
Total size: 79,680 │
157+
Hash functions: 2 │
158+
Hash shift: 53 ─────────────────┘
159+
ID: 2 Level: 2 Children: 0 ─┐
160+
User bins: 2 │
161+
Active bins: 64 │
162+
Technical bins: 64 │
163+
Empty bins: 0 │
164+
Bin words: 1 ├ Child of IBF with ID=1
165+
Bin size: 394 │
166+
Total size: 25,216 │
167+
Hash functions: 2 │
168+
Hash shift: 55 ────────────┘
169+
```

util/inspect/fmt.hpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// SPDX-FileCopyrightText: 2006-2025, Knut Reinert & Freie Universität Berlin
2+
// SPDX-FileCopyrightText: 2016-2025, Knut Reinert & MPI für molekulare Genetik
3+
// SPDX-License-Identifier: BSD-3-Clause
4+
5+
#pragma once
6+
7+
#include <fmt/color.h>
8+
9+
#include <sharg/detail/terminal.hpp>
10+
11+
namespace seqan::hibf::util
12+
{
13+
14+
struct styles
15+
{
16+
static inline bool const cout_is_terminal = sharg::detail::stdout_is_terminal();
17+
static inline bool const cerr_is_terminal = sharg::detail::stderr_is_terminal();
18+
19+
static bool use_style(FILE * stream)
20+
{
21+
return (stream == stdout && cout_is_terminal) || (stream == stderr && cerr_is_terminal);
22+
}
23+
24+
static fmt::text_style color(fmt::color const color, FILE * stream = stdout)
25+
{
26+
if (use_style(stream))
27+
return fmt::fg(color);
28+
else
29+
return fmt::text_style{};
30+
}
31+
32+
static fmt::text_style emphasis(fmt::emphasis const emphasis, FILE * stream = stdout)
33+
{
34+
if (use_style(stream))
35+
return emphasis;
36+
else
37+
return fmt::text_style{};
38+
}
39+
};
40+
41+
} // namespace seqan::hibf::util

util/inspect/inspect.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// SPDX-FileCopyrightText: 2006-2025, Knut Reinert & Freie Universität Berlin
2+
// SPDX-FileCopyrightText: 2016-2025, Knut Reinert & MPI für molekulare Genetik
3+
// SPDX-License-Identifier: BSD-3-Clause
4+
5+
#include <sharg/parser.hpp>
6+
7+
#include <cereal/archives/binary.hpp>
8+
9+
#include <hibf/hierarchical_interleaved_bloom_filter.hpp>
10+
11+
#include "fmt.hpp"
12+
#include "inspect.hpp"
13+
14+
struct config
15+
{
16+
std::filesystem::path input{};
17+
bool is_hibf{};
18+
};
19+
20+
config parse_arguments(std::vector<std::string> command_line)
21+
{
22+
sharg::parser parser{"inspect", std::move(command_line), sharg::update_notifications::off};
23+
config cfg{};
24+
25+
parser.add_option(cfg.input,
26+
sharg::config{.short_id = '\0',
27+
.long_id = "input",
28+
.description = "The index to inspect.",
29+
.validator = sharg::input_file_validator{}});
30+
31+
parser.add_flag(cfg.is_hibf,
32+
sharg::config{.short_id = '\0', .long_id = "hibf", .description = "The index is an HIBF"});
33+
34+
parser.info.author = "Enrico Seiler";
35+
parser.info.short_copyright = "BSD 3-Clause License";
36+
parser.info.short_description = "Inspect an IBF/HIBF.";
37+
parser.parse();
38+
39+
return cfg;
40+
}
41+
42+
int main(int argc, char ** argv)
43+
{
44+
try
45+
{
46+
config const config = parse_arguments({argv, argv + argc});
47+
48+
if (!config.is_hibf)
49+
{
50+
seqan::hibf::interleaved_bloom_filter ibf{};
51+
{
52+
std::ifstream os{config.input, std::ios::binary};
53+
cereal::BinaryInputArchive iarchive{os};
54+
iarchive(ibf);
55+
}
56+
57+
seqan::hibf::util::inspect(ibf);
58+
}
59+
else
60+
{
61+
seqan::hibf::hierarchical_interleaved_bloom_filter hibf{};
62+
{
63+
std::ifstream os{config.input, std::ios::binary};
64+
cereal::BinaryInputArchive iarchive{os};
65+
iarchive(hibf);
66+
}
67+
68+
seqan::hibf::util::inspect(hibf);
69+
}
70+
}
71+
catch (std::exception const & ext)
72+
{
73+
fmt::print(stderr, seqan::hibf::util::styles::color(fmt::color::red, stderr), "[Error]");
74+
fmt::print(stderr, " {}\n", ext.what());
75+
std::exit(-1);
76+
}
77+
78+
return 0;
79+
}

0 commit comments

Comments
 (0)