Skip to content

Commit ee331f5

Browse files
committed
v2
1 parent 4564f15 commit ee331f5

File tree

5 files changed

+362
-103
lines changed

5 files changed

+362
-103
lines changed

include/misc/charconv

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
// -*- C++ -*-
2+
// SPDX-FileCopyrightText: 2006-2026, Knut Reinert & Freie Universität Berlin
3+
// SPDX-FileCopyrightText: 2016-2026, Knut Reinert & MPI für molekulare Genetik
4+
// SPDX-License-Identifier: BSD-3-Clause
5+
6+
/*!\file
7+
* \brief The [\<charconv\> header](https://en.cppreference.com/w/cpp/header/charconv) from C++17's standard library.
8+
* \author Svenja Mehringer <svenja.mehringer AT fu-berlin.de>
9+
*/
10+
11+
#ifndef SEQAN_STD_CHARCONV_SHIM // to avoid multiple definitions if other seqan modules also implement this
12+
#define SEQAN_STD_CHARCONV_SHIM
13+
14+
#include <charconv>
15+
#include <utility> // __cpp_lib_to_chars may be defined here as currently documented.
16+
#include <version> // From C++20 onwards, all feature macros should be defined here.
17+
18+
/*!\defgroup std std
19+
* \ingroup misc
20+
* \brief The [\<charconv\> header](https://en.cppreference.com/w/cpp/header/charconv) from C++17's standard library.
21+
* \details
22+
*
23+
* The following table describes what implementation of std::to_chars and std::from_chars will be used
24+
*
25+
* | stdlib version | __cpp_lib_to_chars | chars_format | to_chars_result | from_chars_result | to_chars (int) | from_chars (int) | to_chars (float) | from_chars (float) |
26+
* | -------------- | ------------------------------------------------ | -------------- | --------------- | ----------------- | -------------- | ---------------- | -------------------- | -------------------- |
27+
* | gcc 10 | undefined and `<charconv>` header | stdlib | stdlib | stdlib | stdlib | stdlib | shim (ostringstream) | shim (strto[f/d/ld]) |
28+
* | gcc 11 | undefined (or 201611) and `<charconv>` header | stdlib | stdlib | stdlib | stdlib | stdlib | stdlib | stdlib |
29+
*
30+
* Note: gcc 11 implements float too, but does not define __cpp_lib_to_chars
31+
*/
32+
33+
// =========================================================================
34+
// If float implementation is missing, add our own shim-implementation
35+
// =========================================================================
36+
37+
#if __cpp_lib_to_chars < 201611
38+
# include <cassert>
39+
# include <sstream>
40+
# include <type_traits>
41+
42+
namespace sharg::contrib::charconv_float
43+
{
44+
using ::std::chars_format;
45+
using ::std::from_chars_result;
46+
using ::std::to_chars_result;
47+
48+
/*!\brief std::to_chars implementation for floating point via a std::stringstream for default base = 10.
49+
* \ingroup std
50+
*/
51+
template <typename value_type>
52+
requires std::is_floating_point_v<value_type>
53+
inline to_chars_result to_chars_floating_point(char * first, char * last, value_type value) noexcept
54+
{
55+
assert(first != nullptr);
56+
assert(last != nullptr);
57+
58+
std::ostringstream ss;
59+
ss << value;
60+
auto str = ss.str();
61+
62+
if (last - first < static_cast<std::ptrdiff_t>(str.size()))
63+
return {last, std::errc::value_too_large};
64+
65+
std::copy(str.begin(), str.end(), first);
66+
67+
return {first + str.size(), std::errc{}};
68+
}
69+
70+
/*!\brief Delegates to functions strto[d/f/ld] for floating point value extraction.
71+
* \ingroup std
72+
*/
73+
template <typename value_type>
74+
requires std::is_floating_point_v<value_type>
75+
inline from_chars_result from_chars_floating_point(char const * first,
76+
char const * last,
77+
value_type & value,
78+
chars_format fmt = chars_format::general) noexcept
79+
{
80+
// The locale issue:
81+
// std::from_chars is documented to be locale independent. The accepted patterns
82+
// are identical to the one used by strtod in the defailt ("C") locale.
83+
//
84+
// The functions strto[d/f/ld] used here are locale dependent but
85+
// setting the locale manually by std::setlocale is not thread safe.
86+
// So for the time being this workaround is locale dependent.
87+
if (*first == '+') // + is permitted in function strto[d/f/ld] but not in from_chars
88+
return {last, std::errc::invalid_argument};
89+
90+
value_type tmp{};
91+
constexpr ptrdiff_t buffer_size = 100;
92+
char buffer[buffer_size];
93+
94+
if (fmt != chars_format::general)
95+
{
96+
bool exponent_is_present{false};
97+
for (auto it = first; it != last; ++it)
98+
{
99+
if (*it == 'e' || *it == 'E')
100+
{
101+
exponent_is_present = true;
102+
break;
103+
}
104+
}
105+
106+
if (fmt == chars_format::scientific && !exponent_is_present)
107+
return {last, std::errc::invalid_argument};
108+
109+
if (fmt == chars_format::fixed && exponent_is_present)
110+
return {last, std::errc::invalid_argument};
111+
}
112+
113+
// In contrast to std::from_chars, std::strto[f/d/ld] does not treat the second
114+
// parameter (str_end) as "end of the sequence to parse" but merely as an out
115+
// parameter to indicate where the parsing ended. Therefore, if [last] does
116+
// not point to the end of a null-terminated string, a buffer is needed to
117+
// represent the truncated sequence and ensure correct from_chars functionality.
118+
char * start;
119+
120+
if ((*last != '\0') || fmt == chars_format::hex)
121+
{
122+
// If hex format is explicitly expected, the 0x prefix is not allowed in the
123+
// the original sequence according to the std::from_chars cppreference
124+
// documentation.
125+
// In order to use strto[f/d/ld], the prefix must be prepended to achieve
126+
// correct parsing. This will also automatically lead to an error if the
127+
// original sequence did contain a 0x prefix and thus reflect the correct
128+
// requirements of std::from_chars.
129+
ptrdiff_t offset{0};
130+
if (fmt == chars_format::hex)
131+
{
132+
buffer[0] = '0';
133+
buffer[1] = 'x';
134+
offset = 2;
135+
}
136+
137+
std::copy(first, last, &buffer[offset]);
138+
buffer[std::min<ptrdiff_t>(buffer_size - offset, last - first)] = '\0';
139+
140+
start = &buffer[0];
141+
}
142+
else
143+
{
144+
start = const_cast<char *>(first);
145+
}
146+
147+
char * end;
148+
149+
if constexpr (std::is_same_v<std::remove_reference_t<value_type>, float>)
150+
{
151+
tmp = strtof(start, &end);
152+
}
153+
if constexpr (std::is_same_v<std::remove_reference_t<value_type>, double>)
154+
{
155+
tmp = strtod(start, &end);
156+
}
157+
if constexpr (std::is_same_v<std::remove_reference_t<value_type>, long double>)
158+
{
159+
tmp = strtold(start, &end);
160+
}
161+
162+
last = first + (end - start);
163+
164+
if (errno == ERANGE)
165+
{
166+
return {last, std::errc::result_out_of_range};
167+
}
168+
else if (tmp == 0 && end == start)
169+
{
170+
return {last, std::errc::invalid_argument};
171+
}
172+
173+
// Success.
174+
value = tmp;
175+
return {last, {}};
176+
}
177+
178+
} // namespace sharg::contrib::charconv_float
179+
180+
namespace sharg::contrib::charconv_float
181+
{
182+
// -----------------------------------------------------------------------------
183+
// to_chars for floating point types
184+
// -----------------------------------------------------------------------------
185+
186+
/*!\brief std::to_chars overload for floating point via a std::stringstream for default base = 10.
187+
* \ingroup std
188+
*/
189+
template <typename floating_point_type>
190+
requires std::is_floating_point_v<floating_point_type>
191+
inline to_chars_result to_chars(char * first, char * last, floating_point_type value) noexcept
192+
{
193+
return to_chars_floating_point(first, last, value);
194+
}
195+
196+
// -----------------------------------------------------------------------------
197+
// from_chars for floating point types
198+
// -----------------------------------------------------------------------------
199+
200+
/*!\brief Parse a char sequence into an floating point value.
201+
* \ingroup std
202+
* \tparam floating_point_type The type to parse the string into; Must model std::is_floating_point_v.
203+
* \param[in] first The start of the string to parse.
204+
* \param[in] last The end of the string to parse.
205+
* \param[in, out] value The value to store the parsed result in.
206+
* \param[in] fmt The std::chars_format that alters the behaviour of parsing.
207+
* \returns A std::from_char_result. See detail section return value for more information.
208+
*
209+
* \details
210+
*
211+
* Analyzes the character sequence [first,last) for a pattern described below.
212+
* If no characters match the pattern or if the value obtained by parsing the
213+
* matched characters is not representable in the type of value, value is
214+
* unmodified, otherwise the characters matching the pattern are interpreted as
215+
* a text representation of an arithmetic value, which is stored in value.
216+
*
217+
* Floating-point parsers: Expects the pattern identical to the one used by
218+
* std::strtod in the default ("C") locale, except that:
219+
*
220+
* - the plus sign is not recognized outside of the exponent (only the minus
221+
* sign is permitted at the beginning)
222+
* - if fmt has std::chars_format::scientific set but not std::chars_format::fixed,
223+
* the exponent part is required (otherwise it is optional)
224+
* - if fmt has std::chars_format::fixed set but not std::chars_format::scientific,
225+
* the optional exponent is not permitted
226+
* - if fmt is std::chars_format::hex, the prefix "0x" or "0X" is not permitted
227+
* (the string "0x123" parses as the value "0" with unparsed remainder "x123").
228+
*
229+
* \attention This implementation is a workaround until the function is supported
230+
* by the compiler. It falls back to use the functions strto[d/f/ld]
231+
* before checking the above limitations
232+
*
233+
* ### Return value
234+
* This function is workaround until the function is supported
235+
* by the compiler. It falls back to use the functions strto[d/f/ld] so the
236+
* return value is NOT as documented here https://en.cppreference.com/w/cpp/utility/from_chars
237+
* but:
238+
*
239+
* On success, std::from_chars_result::ec is value-initialized. On error,
240+
* std::from_chars_result::ec is either an
241+
* std::errc::invalid_argument if an illegal character or format has been
242+
* encountered, or std::errc::out_of_range if parsing the value would cause an
243+
* overflow. The std::from_chars_result::ptr value is always set to last.
244+
*
245+
* ### The locale issue
246+
* std::from_chars is documented to be locale independent. The accepted patterns
247+
* are identical to the one used by strtod in the defailt ("C") locale.
248+
*
249+
* The functions strto[d/f/ld] used here are locale dependent but
250+
* setting the locale manually by std::setlocale is not thread safe.
251+
* So for the time being this workaround is locale dependent.
252+
*
253+
* \sa https://en.cppreference.com/w/cpp/utility/from_chars
254+
*/
255+
template <typename floating_point_type>
256+
requires std::is_floating_point_v<floating_point_type>
257+
inline from_chars_result from_chars(char const * first,
258+
char const * last,
259+
floating_point_type & value,
260+
chars_format fmt = chars_format::general) noexcept
261+
{
262+
return from_chars_floating_point(first, last, value, fmt);
263+
}
264+
} // namespace sharg::contrib::charconv_float
265+
266+
namespace std
267+
{
268+
// gcc-11 also defines float versions, but they don't clash with ours, because they use explicit overloads for each
269+
// float type. That means the stdlib has a higher priority in overload resolution then our shim implementation.
270+
using ::sharg::contrib::charconv_float::from_chars; // import our shim-float version
271+
using ::sharg::contrib::charconv_float::to_chars; // import our shim-float version
272+
} // namespace std
273+
274+
#endif // __cpp_lib_to_chars < 201611
275+
276+
#endif // SEQAN_STD_CHARCONV_SHIM

include/misc/needle_matrix.hpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44

55
#pragma once
66

7-
#include <array>
87
#include <cstddef>
9-
#include <mdspan>
8+
#include <ranges>
109
#include <span>
1110
#include <vector>
1211

@@ -25,6 +24,12 @@ class needle_matrix
2524
needle_matrix & operator=(needle_matrix &&) = default;
2625
~needle_matrix() = default;
2726

27+
needle_matrix(std::vector<value_t> data, size_t levels, size_t experiments) :
28+
data_(std::move(data)),
29+
levels_(levels),
30+
experiments_(experiments)
31+
{}
32+
2833
needle_matrix(size_t levels, size_t experiments) :
2934
data_(levels * experiments),
3035
levels_(levels),
@@ -41,7 +46,7 @@ class needle_matrix
4146
template <typename self_t>
4247
[[nodiscard]] constexpr auto && operator[](this self_t && self, size_t lvl, size_t exp) noexcept
4348
{
44-
return self.view()[lvl, exp];
49+
return self[lvl * self.experiments() + exp];
4550
}
4651

4752
// Flat 1D access (for bin-wise operations)
@@ -51,26 +56,28 @@ class needle_matrix
5156
return self.data()[bin];
5257
}
5358

54-
template <typename self_t>
55-
[[nodiscard]] constexpr auto view(this self_t && self) noexcept
56-
{
57-
return std::mdspan(self.data(), self.levels(), self.experiments());
58-
}
59-
6059
// Returns a contiguous span for a single level (row)
6160
template <typename self_t>
6261
[[nodiscard]] constexpr auto level(this self_t && self, size_t lvl) noexcept
6362
{
6463
return std::span(self.data() + (lvl * self.experiments()), self.experiments());
6564
}
6665

67-
// Returns a strided mdspan for a single experiment (column)
66+
// Returns a transform view for a single experiment (column)
6867
template <typename self_t>
6968
[[nodiscard]] constexpr auto experiment(this self_t && self, size_t exp) noexcept
7069
{
71-
return std::mdspan(self.data() + exp,
72-
std::layout_stride::mapping{std::extents<size_t, std::dynamic_extent>{self.levels()},
73-
std::array<size_t, 1>{self.experiments()}});
70+
return std::views::iota(size_t{0}, self.levels())
71+
| std::views::transform(
72+
[&self, exp](size_t lvl) -> auto &
73+
{
74+
return self[lvl, exp];
75+
});
76+
}
77+
78+
constexpr void zero() noexcept
79+
{
80+
std::ranges::fill_n(data(), size(), value_t{});
7481
}
7582

7683
[[nodiscard]] constexpr size_t levels() const noexcept
@@ -85,11 +92,4 @@ class needle_matrix
8592
{
8693
return data_.size();
8794
}
88-
89-
// Invalidates all views (.view(), .experiment(), .level())!
90-
void add_level()
91-
{
92-
++levels_;
93-
data_.resize(levels() * experiments());
94-
}
9595
};

src/estimate.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,8 @@ void estimate(estimate_ibf_arguments & args,
231231
{
232232
ids.clear();
233233
seqs.clear();
234-
std::ranges::fill_n(prev_counts.data(), prev_counts.size(), float{});
235-
std::ranges::fill_n(estimations.data(), estimations.size(), uint16_t{});
234+
prev_counts.zero();
235+
estimations.zero();
236236
};
237237

238238
auto process_ibf = [&](size_t const i)

src/ibf.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ void ibf_helper(std::vector<std::filesystem::path> const & minimiser_files,
383383
cutoffs[i],
384384
expression_by_genome);
385385
auto experiment = expressions.experiment(i);
386-
for (size_t j = 0; j < experiment.extent(0); ++j)
386+
for (size_t j = 0; j < experiment.size(); ++j)
387387
experiment[j] = expression_thresholds[j];
388388
}
389389

0 commit comments

Comments
 (0)