Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/global_rng.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,11 +176,21 @@ pub fn f32() -> f32 {
with_rng(|r| r.f32())
}

/// Generates a random `f32` in range `0..=1`.
pub fn f32_inclusive() -> f32 {
with_rng(|r| r.f32_inclusive())
}

/// Generates a random `f64` in range `0..1`.
pub fn f64() -> f64 {
with_rng(|r| r.f64())
}

/// Generates a random `f64` in range `0..=1`.
pub fn f64_inclusive() -> f64 {
with_rng(|r| r.f64_inclusive())
}

/// Collects `amount` values at random from the iterable into a vector.
pub fn choose_multiple<I: IntoIterator>(source: I, amount: usize) -> Vec<I::Item> {
with_rng(|rng| rng.choose_multiple(source, amount))
Expand Down
61 changes: 55 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,18 +364,67 @@ impl Rng {
}
}

/// Generates a random `f32` in range `0..=1`.
#[inline]
pub fn f32_inclusive(&mut self) -> f32 {
// Generate a number in 0..2^63 then convert to f32 and multiply by 2^(-63).
//
// Even though we're returning f32, we still generate u64 internally to make
// it possible to return nonzero numbers as small as 2^(-63). If we only
// generated u32 internally, the smallest nonzero number we could return
// would be 2^(-32).
//
// The integer we generate is in 0..2^63 rather than 0..2^64 to improve speed
// on x86-64, which has efficient i64->float conversion (cvtsi2ss) but for
// which u64->float conversion must be implemented in software.
//
// There is still some remaining bias in the int-to-float conversion, because
// nonzero numbers <=2^(-64) are never generated, even though they are
// expressible in f32. However, at this point the bias in int-to-float conversion
// is no larger than the bias in the underlying WyRand generator: since it only
// has a 64-bit state, it necessarily already have biases of at least 2^(-64)
// probability.
//
// See e.g. Section 3.1 of Thomas, David B., et al. "Gaussian random number generators,
// https://www.doc.ic.ac.uk/~wl/papers/07/csur07dt.pdf, for background.
const MUL: f32 = 1.0 / (1u64 << 63) as f32;
(self.gen_u64() >> 1) as f32 * MUL
}

/// Generates a random `f32` in range `0..1`.
///
/// Function `f32_inclusive()` is a little simpler and faster, so default
/// to that if inclusive range is acceptable.
#[inline]
pub fn f32(&mut self) -> f32 {
let b = 32;
let f = core::f32::MANTISSA_DIGITS - 1;
f32::from_bits((1 << (b - 2)) - (1 << f) + (self.u32(..) >> (b - f))) - 1.0
loop {
let x = self.f32_inclusive();
if x < 1.0 {
return x;
}
}
}
Comment on lines +400 to +406
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was the perf of this measured? Seems like adding a branch and a loop to f32 might have performance implications I'm not comfortable with releasing.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't measure at the time since there was no existing benchmark; I only looked at assembly.

I've now added a benchmark in #112 and measured the results on AArch64 (M2 Pro) and x86-64 (Zen4). I get:

AArch64:
0.449ns/iter: old f32().   (2, 3, or 4 iters unrolled)
0.344ns/iter: new f32_inclusive().  (4 iters unrolled)
0.587ns/iter: new f32().  (2 iters unrolled)

x86-64:
0.856ns/iter: old f32().  (3 iterations)
0.649ns/iter: new f32_inclusive().
0.772ns/iter: new f32().  (2 iterations unrolled)

So the change is always a win if you switch to f32_inclusive(), and it's a win on x86-64 (but a loss on AArch64) if you stay with f32().

Note that the branch is almost-always taken (probability 1 - 2^-23) so the branch predictor will predict this ~perfectly.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(These numbers are the benchmark numbers, divided by 10_000, since the benchmark itself generates 10_000 f32s.)


/// Generates a random `f64` in range `0..=1`.
#[inline]
pub fn f64_inclusive(&mut self) -> f64 {
// See the comment in f32_inclusive() for more details.
const MUL: f64 = 1.0 / (1u64 << 63) as f64;
(self.gen_u64() >> 1) as f64 * MUL
}

/// Generates a random `f64` in range `0..1`.
///
/// Function `f64_inclusive()` is a little simpler and faster, so default
/// to that if inclusive range is acceptable.
#[inline]
pub fn f64(&mut self) -> f64 {
let b = 64;
let f = core::f64::MANTISSA_DIGITS - 1;
f64::from_bits((1 << (b - 2)) - (1 << f) + (self.u64(..) >> (b - f))) - 1.0
loop {
let x = self.f64_inclusive();
if x < 1.0 {
return x;
}
}
}

/// Collects `amount` values at random from the iterable into a vector.
Expand Down
93 changes: 53 additions & 40 deletions tests/smoke.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,59 +77,72 @@ fn u128() {

#[test]
fn f32() {
for _ in 0..1000 {
let result = fastrand::f32();
assert!((0.0..1.0).contains(&result));
}
}

#[test]
fn f64() {
for _ in 0..1000 {
let result = fastrand::f64();
assert!((0.0..1.0).contains(&result));
}
}

#[test]
fn digit() {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I accidentally removed several unrelated tests when resolving conflicts. Filed #110 to re-add them.

for base in 1..36 {
let result = fastrand::digit(base);
assert!(result.is_ascii_digit() || result.is_ascii_lowercase());
}
}

#[test]
fn global_rng_choice() {
let items = [1, 4, 9, 5, 2, 3, 6, 7, 8, 0];

for item in &items {
while fastrand::choice(&items).unwrap() != item {}
let mut r = fastrand::Rng::with_seed(0);
let tiny = (-24.0f32).exp2();
let mut count_tiny_nonzero = 0;
let mut count_top_half = 0;
for _ in 0..100_000_000 {
let x = r.f32();
assert!((0.0..1.0).contains(&x));
if x > 0.0 && x < tiny {
count_tiny_nonzero += 1;
} else if x > 0.5 {
count_top_half += 1;
}
}
assert!(count_top_half >= 49_000_000);
assert!(count_tiny_nonzero > 0);
}

#[test]
fn global_rng_alphabetic() {
for _ in 0..1000 {
let result = fastrand::alphabetic();
assert!(result.is_ascii_alphabetic())
fn f32_inclusive() {
let mut r = fastrand::Rng::with_seed(0);
let tiny = (-24.0f32).exp2();
let mut count_top_half = 0;
let mut count_tiny_nonzero = 0;
let mut count_one = 0;
for _ in 0..100_000_000 {
let x = r.f32_inclusive();
assert!((0.0..=1.0).contains(&x));
if x == 1.0 {
count_one += 1;
} else if x > 0.5 {
count_top_half += 1;
} else if x > 0.0 && x < tiny {
count_tiny_nonzero += 1;
}
}
assert!(count_top_half >= 49_000_000);
assert!(count_one > 0);
assert!(count_tiny_nonzero > 0);
}

#[test]
fn global_rng_lowercase() {
for _ in 0..1000 {
let result = fastrand::lowercase();
assert!(result.is_ascii_lowercase())
fn f64() {
let mut r = fastrand::Rng::with_seed(0);
let mut count_top_half = 0;
for _ in 0..100_000_000 {
let x = r.f64();
assert!((0.0..1.0).contains(&x));
if x > 0.5 {
count_top_half += 1;
}
}
assert!(count_top_half >= 49_000_000);
}

#[test]
fn global_rng_uppercase() {
for _ in 0..1000 {
let result = fastrand::uppercase();
assert!(result.is_ascii_uppercase())
fn f64_inclusive() {
let mut r = fastrand::Rng::with_seed(0);
let mut count_top_half = 0;
for _ in 0..100_000_000 {
let x = r.f64_inclusive();
assert!((0.0..=1.0).contains(&x));
if x > 0.5 {
count_top_half += 1;
}
}
assert!(count_top_half >= 49_000_000);
}

#[test]
Expand Down