Skip to content

Commit 7826ccf

Browse files
committed
[rand_dist] fast_real: drop precision in favor of perf
1 parent aa032cb commit 7826ccf

File tree

1 file changed

+19
-5
lines changed

1 file changed

+19
-5
lines changed

include/itlib/rand_dist.hpp

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
//
2929
// VERSION HISTORY
3030
//
31-
// 0.01 (2025-10-30) Initial release
31+
// 1.00 (2025-10-31) Initial release
3232
//
3333
//
3434
// DOCUMENTATION
@@ -281,15 +281,22 @@ struct fast_uniform_real_distribution {
281281
impl::do_rng_checks(rng);
282282

283283
// (1 << d) - 1 is more readable, but might overflow
284-
ITLIB_RAND_DIST_CONSTEXPR uint64_t max_int = ~uint64_t(0) >> (64 - std::numeric_limits<F>::digits);
284+
constexpr uint64_t max_int = ~uint64_t(0) >> (64 - std::numeric_limits<F>::digits);
285285

286286
using r_t = typename R::result_type;
287287
ITLIB_RAND_DIST_CONSTEXPR uint64_t rng_range = R::max() - R::min();
288288
if ITLIB_RAND_DIST_CONSTEXPR(rng_range >= max_int) {
289289
// rng_range is enough to saturate our float precision
290290
// we slice off the needed bits (and hope that rng is uniform over all bits)
291291
const auto random_value = r_t(rng() - R::min()) & r_t(max_int);
292-
return std::ldexp(F(random_value), -std::numeric_limits<F>::digits);
292+
293+
// ideally we would use this here:
294+
// return std::ldexp(F(random_value), -std::numeric_limits<F>::digits);
295+
// ... but in reality no compiler would would optimize it well enough
296+
// every single one does a `call ldexp[f]`!
297+
// at best they would do value * exp2[f](-digits) which is literally the same as below
298+
// well, not literally, as exp2 only gets an overload with C++23, but still...
299+
return F(random_value) / F(max_int + 1);
293300
}
294301
else {
295302
// "stretching" to rng_range
@@ -300,12 +307,19 @@ struct fast_uniform_real_distribution {
300307

301308
template <typename R>
302309
constexpr static F draw(F min, F max, R& rng) {
303-
return std::fma(max - min, draw_01(rng), min);
310+
// see comment below about fma
311+
//return std::fma(max - min, draw_01(rng), min);
312+
return min + (max - min) * draw_01(rng);
304313
}
305314

306315
template <typename R>
307316
constexpr F operator()(R& rng) const {
308-
return std::fma(m_scale, draw_01(rng), m_min);
317+
// in the interest of precision we should use fma here:
318+
// return std::fma(m_scale, draw_01(rng), m_min);
319+
// unfortunately it's slower than simple multiply+add on most hardware
320+
// std::uniform_real_distribution uses multiply+add as well
321+
// we want to be fast, so we do it as well
322+
return m_min + m_scale * draw_01(rng);
309323
}
310324

311325
private:

0 commit comments

Comments
 (0)