Skip to content

Commit 6190169

Browse files
authored
feat(perf): use SplitMix64 instead of MT19937 for entropy (#181)
1 parent f176cb9 commit 6190169

File tree

4 files changed

+67
-50
lines changed

4 files changed

+67
-50
lines changed

src/ulid_transform/_ulid_impl.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ py_ulid_hex(PyObject* module, PyObject* Py_UNUSED(ignored))
3434
{
3535
ulid::ULID ulid;
3636
ulid::EncodeTimeSystemClockNow(ulid);
37-
ulid::EncodeEntropyMt19937Fast(ulid);
37+
ulid::EncodeEntropyFast(ulid);
3838
uint8_t buf[ULID_BYTES_LEN];
3939
ulid::MarshalBinaryTo(ulid, buf);
4040
char hex[ULID_HEX_LEN];
@@ -48,7 +48,7 @@ py_ulid_now_bytes(PyObject* module, PyObject* Py_UNUSED(ignored))
4848
{
4949
ulid::ULID ulid;
5050
ulid::EncodeTimeSystemClockNow(ulid);
51-
ulid::EncodeEntropyMt19937Fast(ulid);
51+
ulid::EncodeEntropyFast(ulid);
5252
uint8_t buf[ULID_BYTES_LEN];
5353
ulid::MarshalBinaryTo(ulid, buf);
5454
return PyBytes_FromStringAndSize((const char*)buf, ULID_BYTES_LEN);
@@ -63,7 +63,7 @@ py_ulid_at_time_bytes(PyObject* module, PyObject* arg)
6363
return NULL;
6464
ulid::ULID ulid;
6565
ulid::EncodeTimestamp(static_cast<int64_t>(ts * 1000), ulid);
66-
ulid::EncodeEntropyMt19937Fast(ulid);
66+
ulid::EncodeEntropyFast(ulid);
6767
uint8_t buf[ULID_BYTES_LEN];
6868
ulid::MarshalBinaryTo(ulid, buf);
6969
return PyBytes_FromStringAndSize((const char*)buf, ULID_BYTES_LEN);
@@ -75,7 +75,7 @@ py_ulid_now(PyObject* module, PyObject* Py_UNUSED(ignored))
7575
{
7676
ulid::ULID ulid;
7777
ulid::EncodeTimeSystemClockNow(ulid);
78-
ulid::EncodeEntropyMt19937Fast(ulid);
78+
ulid::EncodeEntropyFast(ulid);
7979
char buf[ULID_TEXT_LEN];
8080
ulid::MarshalTo(ulid, buf);
8181
return PyUnicode_DecodeASCII(buf, ULID_TEXT_LEN, NULL);
@@ -90,7 +90,7 @@ py_ulid_at_time(PyObject* module, PyObject* arg)
9090
return NULL;
9191
ulid::ULID ulid;
9292
ulid::EncodeTimestamp(static_cast<int64_t>(ts * 1000), ulid);
93-
ulid::EncodeEntropyMt19937Fast(ulid);
93+
ulid::EncodeEntropyFast(ulid);
9494
char buf[ULID_TEXT_LEN];
9595
ulid::MarshalTo(ulid, buf);
9696
return PyUnicode_DecodeASCII(buf, ULID_TEXT_LEN, NULL);

src/ulid_transform/splitmix64.hh

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#ifndef ULID_SPLITMIX64_HH
2+
#define ULID_SPLITMIX64_HH
3+
4+
#include <cstdint>
5+
6+
namespace ulid {
7+
8+
/**
9+
* SplitMix64 is a fast, small-state PRNG suitable for non-cryptographic use.
10+
*/
11+
struct SplitMix64 {
12+
uint64_t state;
13+
14+
explicit SplitMix64(uint64_t seed)
15+
: state(seed)
16+
{
17+
}
18+
19+
uint64_t operator()()
20+
{
21+
uint64_t z = (state += 0x9e3779b97f4a7c15ULL);
22+
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9ULL;
23+
z = (z ^ (z >> 27)) * 0x94d049bb133111ebULL;
24+
return z ^ (z >> 31);
25+
}
26+
};
27+
28+
}
29+
30+
#endif // ULID_SPLITMIX64_HH

src/ulid_transform/ulid_struct.hh

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,9 @@
1010
#include <thread>
1111
#include <vector>
1212

13+
#include "splitmix64.hh"
1314
#include "ulid_base32.hh"
1415

15-
#if _MSC_VER > 0
16-
typedef uint32_t rand_t;
17-
#else
18-
typedef uint8_t rand_t;
19-
#endif
20-
2116
namespace ulid {
2217

2318
/**
@@ -60,31 +55,28 @@ inline void EncodeTimeSystemClockNow(ULID& ulid)
6055
}
6156

6257
/**
63-
* EncodeEntropyMt19937Fast will encode using std::mt19937
64-
* with only 3 generated values.
58+
* EncodeEntropyFast will encode using SplitMix64
59+
* with only 2 generated values.
6560
* */
66-
inline void EncodeEntropyMt19937Fast(ULID& ulid)
61+
inline void EncodeEntropyFast(ULID& ulid)
6762
{
68-
static thread_local std::mt19937 gen([]() {
63+
static thread_local SplitMix64 gen([]() {
6964
// Use multiple entropy sources for seeding
70-
std::array<uint32_t, 3> seed_data = {
71-
static_cast<uint32_t>(std::chrono::high_resolution_clock::now().time_since_epoch().count()),
72-
static_cast<uint32_t>(std::random_device { }()),
73-
static_cast<uint32_t>(std::hash<std::thread::id> { }(std::this_thread::get_id()))
74-
};
75-
std::seed_seq seed_seq(seed_data.begin(), seed_data.end());
76-
return std::mt19937(seed_seq);
65+
uint64_t seed = static_cast<uint64_t>(std::chrono::high_resolution_clock::now().time_since_epoch().count());
66+
seed ^= static_cast<uint64_t>(std::random_device { }()) << 32;
67+
seed ^= static_cast<uint64_t>(std::random_device { }());
68+
return seed;
7769
}());
78-
uint64_t high = (static_cast<uint64_t>(gen()) << 32) | gen();
79-
uint32_t low = gen();
80-
ulid.data[6] = (high >> 40) & 0xFF;
81-
ulid.data[7] = (high >> 32) & 0xFF;
82-
ulid.data[8] = (high >> 24) & 0xFF;
83-
ulid.data[9] = (high >> 16) & 0xFF;
84-
ulid.data[10] = (high >> 8) & 0xFF;
85-
ulid.data[11] = high & 0xFF;
86-
ulid.data[12] = (low >> 24) & 0xFF;
87-
ulid.data[13] = (low >> 16) & 0xFF;
70+
uint64_t high = gen();
71+
uint64_t low = gen();
72+
ulid.data[6] = (high >> 56) & 0xFF;
73+
ulid.data[7] = (high >> 48) & 0xFF;
74+
ulid.data[8] = (high >> 40) & 0xFF;
75+
ulid.data[9] = (high >> 32) & 0xFF;
76+
ulid.data[10] = (high >> 24) & 0xFF;
77+
ulid.data[11] = (high >> 16) & 0xFF;
78+
ulid.data[12] = (high >> 8) & 0xFF;
79+
ulid.data[13] = high & 0xFF;
8880
ulid.data[14] = (low >> 8) & 0xFF;
8981
ulid.data[15] = low & 0xFF;
9082
}

src/ulid_transform/ulid_uint128.hh

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,9 @@
99
#include <random>
1010
#include <vector>
1111

12+
#include "splitmix64.hh"
1213
#include "ulid_base32.hh"
1314

14-
#if _MSC_VER > 0
15-
typedef uint32_t rand_t;
16-
#else
17-
typedef uint8_t rand_t;
18-
#endif
19-
2015
namespace ulid {
2116

2217
/**
@@ -75,23 +70,23 @@ inline void EncodeTimeSystemClockNow(ULID& ulid)
7570
}
7671

7772
/**
78-
* EncodeEntropyMt19937Fast will encode using std::mt19937
79-
* with only 3 generated values.
73+
* EncodeEntropyFast will encode using SplitMix64
74+
* with only 2 generated values (providing 128 bits, of which 80 are used).
8075
* */
81-
inline void EncodeEntropyMt19937Fast(ULID& ulid)
76+
inline void EncodeEntropyFast(ULID& ulid)
8277
{
83-
static thread_local std::mt19937 gen([]() {
78+
static thread_local SplitMix64 gen([]() {
8479
// Use multiple entropy sources for seeding
85-
std::array<uint32_t, 3> seed_data = {
86-
static_cast<uint32_t>(std::chrono::high_resolution_clock::now().time_since_epoch().count()),
87-
static_cast<uint32_t>(std::random_device { }()),
88-
static_cast<uint32_t>(reinterpret_cast<uintptr_t>(&gen) & 0xFFFFFFFF)
89-
};
90-
std::seed_seq seed_seq(seed_data.begin(), seed_data.end());
91-
return std::mt19937(seed_seq);
80+
uint64_t seed = static_cast<uint64_t>(std::chrono::high_resolution_clock::now().time_since_epoch().count());
81+
seed ^= static_cast<uint64_t>(std::random_device { }()) << 32;
82+
seed ^= static_cast<uint64_t>(std::random_device { }());
83+
return seed;
9284
}());
85+
constexpr ULID lower80 = (static_cast<ULID>(1) << 80) - 1;
9386
ulid = (ulid >> 80) << 80; // Clear lower 80 bits
94-
ulid |= (static_cast<ULID>((static_cast<uint64_t>(gen()) << 32) | gen()) << 16) | (gen() & 0xFFFF);
87+
uint64_t first_draw = gen();
88+
uint64_t second_draw = gen();
89+
ulid |= ((static_cast<ULID>(first_draw) << 16) | (second_draw & 0xFFFF)) & lower80;
9590
}
9691

9792
/**

0 commit comments

Comments
 (0)