Skip to content

Commit 2e9231a

Browse files
Default seeding of class PRNG (#4720)
This PR changes the default seeding of `class PRNG` to use `std::random_device`. [sc-41075] --- TYPE: NO_HISTORY DESC: Default seeding of `class PRNG`
1 parent a84789e commit 2e9231a

File tree

4 files changed

+173
-27
lines changed

4 files changed

+173
-27
lines changed

tiledb/common/random/prng.cc

Lines changed: 63 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,75 @@
3333
#include "tiledb/common/random/prng.h"
3434

3535
namespace tiledb::common {
36+
/**
37+
* 64-bit mersenne twister engine for random number generation.
38+
*
39+
* This definition is duplicated to avoid having it defined as `public` in
40+
* `class PRNG`.
41+
*/
42+
using prng_type = std::mt19937_64;
43+
44+
/**
45+
* Implementation of the random seed.
46+
*
47+
* This is a class template in order to use `if constexpr`.
48+
*
49+
* @tparam return_size_type The type of the seed to be returned
50+
*/
51+
template <class return_size_type>
52+
return_size_type random_seed() {
53+
static constexpr size_t rng_size = sizeof(std::random_device::result_type);
54+
static constexpr size_t ret_size = sizeof(return_size_type);
55+
std::random_device rng{};
56+
/*
57+
* We will need 64 bits to adequately seed the PRNG (`ret_size`). We support
58+
* cases where the result size of the RNG is 64 or 32 bits (`rng_size`).
59+
*/
60+
if constexpr (ret_size == rng_size) {
61+
return rng();
62+
} else if constexpr (ret_size == 2 * rng_size) {
63+
return (rng() << rng_size) + rng();
64+
} else {
65+
throw std::runtime_error("Unsupported combination of RNG sizes");
66+
}
67+
}
68+
69+
/**
70+
* The PRNG used within the random constructor.
71+
*/
72+
prng_type prng_random() {
73+
return prng_type{random_seed<uint64_t>()}; // RVO
74+
}
75+
76+
/**
77+
* The PRNG used within the default constructor.
78+
*/
79+
prng_type prng_default() {
80+
/*
81+
* Retrieve optional seed, which may or may not have been set explicitly.
82+
*/
83+
auto seed{Seeder::get().seed()};
84+
/*
85+
* Use the seed if it has been set. Otherwise use a random seed.
86+
*/
87+
if (seed.has_value()) {
88+
return prng_type{seed.value()}; // RVO
89+
} else {
90+
return prng_random(); // RVO
91+
}
92+
}
3693

3794
/* ********************************* */
3895
/* CONSTRUCTORS & DESTRUCTORS */
3996
/* ********************************* */
4097

4198
PRNG::PRNG()
42-
: prng_(prng_initial())
99+
: prng_(prng_default())
100+
, mtx_{} {
101+
}
102+
103+
PRNG::PRNG(RandomSeedT)
104+
: prng_(prng_random())
43105
, mtx_{} {
44106
}
45107

@@ -61,16 +123,4 @@ uint64_t PRNG::operator()() {
61123
/* PRIVATE METHODS */
62124
/* ********************************* */
63125

64-
std::mt19937_64 PRNG::prng_initial() {
65-
// Retrieve optional, potentially default-constructed seed.
66-
auto seed{Seeder::get().seed()};
67-
68-
// If the seed has been set, set it on the RNG engine.
69-
if (seed.has_value()) {
70-
return std::mt19937_64{seed.value()}; // RVO
71-
} else {
72-
return {}; // RVO
73-
}
74-
}
75-
76126
} // namespace tiledb::common

tiledb/common/random/prng.h

Lines changed: 89 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,21 +39,106 @@
3939
#include "tiledb/common/random/seeder.h"
4040

4141
namespace tiledb::common {
42+
43+
/**
44+
* Marker class for a test-only PRNG constructor
45+
*/
46+
class RandomSeedT {};
47+
/**
48+
* Marker constant
49+
*/
50+
static constexpr RandomSeedT RandomSeed;
51+
52+
/**
53+
* A random number generator suitable for both production and testing.
54+
*
55+
* @section Requirements
56+
*
57+
* This PRNG must support two very different kinds of situations:
58+
*
59+
* 1. In production use (the ordinary case) the seed must be _actually_ random
60+
* so that the random sequences in different processes are distinct.
61+
* 2. During most testing the seed must be deterministic to ensure that
62+
* different test runs execute the same sequence of operations. This ensures
63+
* that test failures can be replicated for diagnosis and correction.
64+
* a. In particular, the seed in Catch2 test runs should be deterministic.
65+
* 3. Certain tests, however, require actual randomness.
66+
* a. One such test verifies that actual randomness is available per (1). Such
67+
* tests necessarily have the possibility of failures, i.e. of false
68+
* positives, but the actual likelihood can be made extremely low.
69+
* b. Stress tests execute large number of test runs searching for defects.
70+
* Such tests do not generate new information when run with previously-
71+
* used PRNG sequences.
72+
*
73+
* This class satisfies these requirements with the following implementation
74+
* choices:
75+
* 1. If the user has not called `set_seed()` on the global seeder (from
76+
* `Seeder::get`), then the seed is taken from `std::random_device`.
77+
* 2. If the user has called `set_seed()` on the global seeder, that seed is
78+
* used.
79+
* 3. This class uses a global seeder in order to support Catch2. An event
80+
* handler that executes at the start of the test run calls `set_seed()`.
81+
*
82+
* @section Maturity
83+
*
84+
* This class only has a default constructor. It does not have constructors that
85+
* take seeds nor seeders. Such constructors would be useful for replicating
86+
* test runs, but would also be premature at present. There's further test
87+
* infrastructure required to replicate a specific test in isolation. As that
88+
* test infrastructure matures, so also should this class. In the interim, in
89+
* order to replicate a specific test with a specific seed, the function
90+
* `initial_prng()` can be temporarily changed.
91+
*
92+
* This class uses a seeded PRNG to implement the random sequence. The
93+
* requirement is that sequences in different processes be distinct, not that
94+
* they be actually random. A randomly-seeded PRNG satisfies this requirement.
95+
* The motivation for this implementation choice is as follows:
96+
* 1. There is no standard hardware requirement for random number generation.
97+
* While it's generally available, there are unknown variations in
98+
* significant quality parameters such as the rate of random generation,
99+
* duration of an RNG call, and randomness of generation (e.g. n-gram
100+
* entropies).
101+
* 2. In order not to stress a potentially inadequate RNG, we only call it for
102+
* seeding and not for every number.
103+
* 3. Qualifying a potential RNG implementation requires engineering resources
104+
* that have not been committed as yet.
105+
*
106+
* @section Caveat
107+
*
108+
* This class uses `std::random_device` to seed the PRNG if no explicit seed is
109+
* set. The standard library does not require that this class use an actual RNG,
110+
* i.e. RNG from hardware of some kind. Indeed, certain earlier implementations
111+
* did not do so and were deterministic. In order to validate that this device
112+
* is actually random, it's necessary to run a multiprocess test to observe
113+
* initialization in different processes. The test suite does not contain such
114+
* a validation test at present.
115+
*/
42116
class PRNG {
43117
public:
44118
/* ********************************* */
45119
/* CONSTRUCTORS & DESTRUCTORS */
46120
/* ********************************* */
47121

48122
/**
49-
* Constructor.
123+
* Default constructor.
50124
*
51-
* Constructs an mt19937 engine for random number generation.
52-
* If Seeder has been seeded, the seed will be set on the engine.
53-
* Otherwise, it is default-constructed.
125+
* If `Seeder` has been seeded, the seed will be set on the engine. Otherwise,
126+
* the generator is constructed with a random seed.
54127
*/
55128
PRNG();
56129

130+
/**
131+
* Constructor for random seeding.
132+
*
133+
* This constructor makes an object that is always constructed with a random
134+
* seed.
135+
*
136+
* @warning This constructor is only for testing. It must not be used in
137+
* production code, where it would thwart the ability to run tests
138+
* deterministically.
139+
*/
140+
PRNG(RandomSeedT);
141+
57142
/** Copy constructor is deleted. */
58143
PRNG(const PRNG&) = delete;
59144

@@ -89,13 +174,6 @@ class PRNG {
89174

90175
/** Mutex which protects against simultaneous access to operator() body. */
91176
std::mutex mtx_;
92-
93-
/* ********************************* */
94-
/* PRIVATE METHODS */
95-
/* ********************************* */
96-
97-
/** Default-constructs an mt19937 engine and optionally sets the seed. */
98-
std::mt19937_64 prng_initial();
99177
};
100178
} // namespace tiledb::common
101179

tiledb/common/random/seeder.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,10 @@ namespace tiledb::common {
4848
* default (set_seed) seed is set (seed) seed is used
4949
* but unused
5050
*
51-
* Note that each transition may occur only once.
52-
* i.e. A seed may only be set one time and may only be used one time.
51+
* Note that each transition may occur only once, i.e. a seed may only be set
52+
* one time and may only be used one time. This is an explicit design choice to
53+
* ensure that a singleton PRNG is only initialized once, and to prevent the
54+
* case where a seeming initialization is not the actual initialization.
5355
*/
5456
class Seeder {
5557
public:

tiledb/common/random/test/unit_seedable_global_PRNG.cc

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ TEST_CASE(
9696
"SeedableGlobalPRNG: operator",
9797
"[SeedableGlobalPRNG][operator][multiple]") {
9898
PRNG& prng = PRNG::get();
99+
// Verify that a second call succeeds.
100+
CHECK_NOTHROW(PRNG::get());
99101
auto rand_num1 = prng();
100102
CHECK(rand_num1 != 0);
101103

@@ -112,7 +114,11 @@ TEST_CASE(
112114
TEST_CASE(
113115
"SeedableGlobalPRNG: Seeder singleton, errors",
114116
"[SeedableGlobalPRNG][Seeder][singleton][errors]") {
115-
// Note: these errors will occur because PRNG sets and uses the singleton.
117+
/*
118+
* Retrieve a PRNG object explicitly. This will cause the PRNG to use the
119+
* singleton seeder, after which subsequent calls should fail.
120+
*/
121+
[[maybe_unused]] auto& x{PRNG::get()};
116122
Seeder& seeder_ = Seeder::get();
117123

118124
SECTION("try to set new seed after it's been set") {
@@ -128,6 +134,16 @@ TEST_CASE(
128134
}
129135
}
130136

137+
/*
138+
* Verify that randomly-seeded PRNG return different numbers. This is the best
139+
* we can do within the ordinary way within a single-process test, the only kind
140+
* readily available within Catch2.
141+
*/
142+
TEST_CASE("SeedableGlobalPRNG: Random seeding", "[SeedableGlobalPRNG]") {
143+
PRNG x(RandomSeed), y(RandomSeed);
144+
CHECK(x() != y());
145+
}
146+
131147
TEST_CASE("random_label", "[random_label]") {
132148
auto rand_label1 = random_label();
133149
CHECK(rand_label1.length() == 32);

0 commit comments

Comments
 (0)