Skip to content

Commit 24f4947

Browse files
committed
[INFRA] Default native build, update readme
1 parent f00d56f commit 24f4947

File tree

5 files changed

+79
-22
lines changed

5 files changed

+79
-22
lines changed

CMakeLists.txt

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ cmake_minimum_required (VERSION 3.8)
1010
# Define the application name and version.
1111
project (raptor VERSION 2.0.0)
1212

13+
# Messages
14+
string (ASCII 27 Esc)
15+
set (FontBold "${Esc}[1m")
16+
set (FontReset "${Esc}[m")
17+
1318
# Fallback to these values if there is no git or no git repository
1419
set (RAPTOR_COMMIT_DATE "2021-08-20--no-git"
1520
CACHE STRING
@@ -52,24 +57,32 @@ if (NOT CMAKE_BUILD_TYPE)
5257
FORCE)
5358
endif ()
5459

60+
set (RAPTOR_NATIVE_BUILD ON CACHE BOOL "Optimize build for current architecture.")
61+
if (RAPTOR_NATIVE_BUILD)
62+
message (STATUS "${FontBold}Native build enabled. Built binaries will be optimized for this system.${FontReset}")
63+
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
64+
else ()
65+
message (STATUS "${FontBold}Native build disabled. Detecting popcnt support.${FontReset}")
66+
include (CheckCXXCompilerFlag)
67+
check_cxx_compiler_flag ("-mpopcnt" RAPTOR_HAS_POPCNT)
68+
if (RAPTOR_HAS_POPCNT)
69+
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mpopcnt")
70+
endif ()
71+
endif ()
72+
5573
# Specify the directories where to store the built archives, libraries and executables
5674
set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
5775
set (CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
5876
set (CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin")
5977

60-
# Messages
61-
string (ASCII 27 Esc)
62-
set (FontBold "${Esc}[1m")
63-
set (FontReset "${Esc}[m")
64-
6578
# Dependency: SeqAn3.
6679
set (SEQAN3_CEREAL ON CACHE BOOL "Require cereal to be present.")
6780
set (SEQAN3_SUBMODULES_DIR "lib")
6881
find_package (SeqAn3 QUIET REQUIRED HINTS lib/seqan3/build_system)
6982

7083
# Use ccache.
71-
set (USE_CCACHE ON CACHE BOOL "Use ccache if available.")
72-
if (USE_CCACHE)
84+
set (RAPTOR_USE_CCACHE ON CACHE BOOL "Use ccache if available.")
85+
if (RAPTOR_USE_CCACHE)
7386
include ("${SEQAN3_CLONE_DIR}/test/cmake/seqan3_require_ccache.cmake")
7487
seqan3_require_ccache ()
7588
endif ()

README.md

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
1-
# Raptor [![build status](https://github.com/seqan/raptor/workflows/Raptor%20CI/badge.svg?branch=master)](https://github.com/seqan/raptor/actions) [![codecov](https://codecov.io/gh/seqan/raptor/branch/master/graph/badge.svg?token=SJVMYRUKW2)](https://codecov.io/gh/seqan/raptor)
1+
# Raptor [![build status][1]][2] [![codecov][3]][4] [![install with bioconda][5]][6]
2+
3+
[1]: https://github.com/seqan/raptor/workflows/Raptor%20CI/badge.svg?branch=master
4+
[2]: https://github.com/seqan/raptor/actions
5+
[3]: https://codecov.io/gh/seqan/raptor/branch/master/graph/badge.svg?token=SJVMYRUKW2
6+
[4]: https://codecov.io/gh/seqan/raptor
7+
[5]: https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat
8+
[6]: https://bioconda.github.io/recipes/raptor/README.html
9+
210
### A fast and space-efficient pre-filter for querying very large collections of nucleotide sequences
311

412
## Download and Installation
5-
There may be performance benefits when compiling from source, especially when using `-march=native` as compiler
6-
directive.
13+
There may be performance benefits when compiling from source as the build can be optimized for the host system.
714

815
### Install with bioconda (Linux)
9-
[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/raptor/README.html)
1016

1117
```bash
1218
conda install -c bioconda -c conda-forge raptor
@@ -62,16 +68,20 @@ make
6268

6369
The binary can be found in `bin`.
6470

65-
You may want to add the raptor executable yo your PATH:
71+
You may want to add the Raptor executable to your PATH:
6672
```
6773
export PATH=$(pwd)/bin:$PATH
6874
raptor --version
6975
```
7076

77+
By default, Raptor will be built with host specific optimizations (`-march=native`). This behavior can be disabled by
78+
passing `-DRAPTOR_NATIVE_BUILD=OFF` to CMake.
79+
7180
</details>
7281

7382
## Example Data and Usage
74-
A toy data set can be found [here](https://ftp.imp.fu-berlin.de/pub/seiler/raptor/).
83+
A toy data set (124 MiB compressed, 983 MiB decompressed) can be found
84+
[here](https://ftp.imp.fu-berlin.de/pub/seiler/raptor/).
7585

7686
```bash
7787
wget https://ftp.imp.fu-berlin.de/pub/seiler/raptor/example_data.tar.gz
@@ -113,7 +123,7 @@ Afterwards, we can search for some reads:
113123
raptor search --error 2 --index raptor.index --query example_data/64/reads/mini.fastq --output search.output
114124
```
115125

116-
The output starts with a header section (lines starting with `\#`). The header maps a number to each input file.
126+
The output starts with a header section (lines starting with `#`). The header maps a number to each input file.
117127
After the header section, each line of the output consists of the read ID (in the toy example these are numbers) and
118128
the corresponding bins in which they were found:
119129
```text
@@ -169,6 +179,18 @@ The preprocessing applies the same cutoffs as used in Mantis
169179
This means that only minimisers that occur more often than the cutoff specifies are included in the output.
170180
If you wish to process all minimisers, you can use `--disable-cutoffs`.
171181

182+
### Partitioned indices
183+
To reduce the overall memory consumption, the index can be divided into multiple (a power of two) parts.
184+
This can be done by passing `--parts n` to `raptor build`, where `n` is the number of parts you want to create.
185+
This will create `n` files, each representing one part of the index. The `--size` parameter describes the overall size
186+
of the index. For example, `--size 8g --parts 4` will create four 2 GiB indices. This will reduce the memory consumption
187+
of `raptor build` and `raptor search` by approximately 6 GiB, since there will only be one part in memory at any given
188+
time. `raptor search` will automatically detect the parts, and does not need any special parameters.
189+
190+
### Upgrading the index (v1.1.0 to v2.0.0)
191+
An old index can be upgraded by running `raptor upgrade` and providing some information about how the index was
192+
constructed.
193+
172194
### SOCKS interface
173195
We implement the core interface of [SOCKS](https://gitlab.ub.uni-bielefeld.de/gi/socks).
174196
For a list of options, see the help pages:

src/argument_parsing/build.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ void init_build_parser(seqan3::argument_parser & parser, build_arguments & argum
4444
'\0',
4545
"shape",
4646
"The shape to use for k-mers. Mutually exclusive with --kmer.",
47-
seqan3::option_spec::hidden, // Add help in kmer_size
47+
seqan3::option_spec::advanced, // Add help in kmer_size
4848
seqan3::regex_validator{"[01]+"});
4949
parser.add_option(arguments.out_path,
5050
'\0',
@@ -73,6 +73,11 @@ void init_build_parser(seqan3::argument_parser & parser, build_arguments & argum
7373
"compute-minimiser",
7474
"Computes minimisers using cutoffs from Mantis (Pandey et al.). Does not create the index.",
7575
arguments.is_socks ? seqan3::option_spec::hidden : seqan3::option_spec::standard);
76+
parser.add_flag(arguments.compute_minimiser,
77+
'\0',
78+
"compute-minimizer",
79+
"Hidden flag, alias of --compute-minimiser.",
80+
seqan3::option_spec::hidden);
7681
parser.add_flag(arguments.disable_cutoffs,
7782
'\0',
7883
"disable-cutoffs",
@@ -119,8 +124,13 @@ void run_build(seqan3::argument_parser & parser, bool const is_socks)
119124
arguments.window_size = arguments.shape.size();
120125
}
121126

122-
std::filesystem::path output_directory = parser.is_option_set("compute-minimiser") ? arguments.out_path :
123-
arguments.out_path.parent_path();
127+
bool const is_compute_minimiser_set{parser.is_option_set("compute-minimiser") ||
128+
parser.is_option_set("compute-minimizer")};
129+
130+
arguments.compute_minimiser = is_compute_minimiser_set;
131+
132+
std::filesystem::path output_directory = is_compute_minimiser_set ? arguments.out_path :
133+
arguments.out_path.parent_path();
124134
std::error_code ec{};
125135
std::filesystem::create_directories(output_directory, ec);
126136

@@ -132,7 +142,7 @@ void run_build(seqan3::argument_parser & parser, bool const is_socks)
132142
ec.message())};
133143
// LCOV_EXCL_END
134144

135-
if (!parser.is_option_set("compute-minimiser"))
145+
if (!is_compute_minimiser_set)
136146
{
137147
seqan3::output_file_validator{}(arguments.out_path);
138148

test/cli/raptor_options_test.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,17 @@ TEST_F(raptor_build, directory_missing)
131131
EXPECT_EQ(result.err, std::string{"[Error] Option --output is required but not set.\n"});
132132
}
133133

134+
TEST_F(raptor_build, alias)
135+
{
136+
cli_test_result const result = execute_app("raptor", "build",
137+
"--size 8m",
138+
"--compute-minimizer",
139+
tmp_bin_list_file.file_path);
140+
EXPECT_NE(result.exit_code, 0);
141+
EXPECT_EQ(result.out, std::string{});
142+
EXPECT_EQ(result.err, std::string{"[Error] Option --output is required but not set.\n"});
143+
}
144+
134145
TEST_F(raptor_build, size_missing)
135146
{
136147
cli_test_result const result = execute_app("raptor", "build",

test/util/collect_compile_stats.sh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/usr/bin/env bash
2+
set -e
23

34
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &> /dev/null && pwd)"
45

@@ -8,14 +9,14 @@ reset_scripts() {
89
}
910
trap reset_scripts EXIT
1011

11-
set -ex
12-
13-
cmake $SCRIPT_DIR/../.. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER=$SCRIPT_DIR/g++.sh -DCMAKE_C_COMPILER=$SCRIPT_DIR/gcc.sh -DUSE_CCACHE=OFF
12+
cmake $SCRIPT_DIR/../.. -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_COMPILER=$SCRIPT_DIR/g++.sh -DCMAKE_C_COMPILER=$SCRIPT_DIR/gcc.sh -DRAPTOR_USE_CCACHE=OFF -DRAPTOR_NATIVE_BUILD=ON
1413

1514
sed -i "s/DO_TIME=0/DO_TIME=1/" $SCRIPT_DIR/gcc.sh
1615
sed -i "s/DO_TIME=0/DO_TIME=1/" $SCRIPT_DIR/g++.sh
1716

18-
make -k -j4 cli_test api_test
17+
make -k -j6 cli_test api_test
1918

2019
find . -name "ram_usage.*" -exec cat {} + > complete.txt
2120
$SCRIPT_DIR/parse.py complete.txt stats.csv
21+
22+
echo "Results can be found in $(pwd)/stats.csv"

0 commit comments

Comments
 (0)