Skip to content

Commit 29e82e4

Browse files
committed
Merge #15250: Use RdSeed when available, and reduce RdRand load
1435fab Use RdSeed when available, and reduce RdRand load (Pieter Wuille) Pull request description: This introduces support for autodetecting and using the RdSeed instruction on x86/x86_64 systems. In addition: * In SeedFast, only 64 bits of entropy are generated through RdRand (256 was relatively slow). * In SeedStartup, 256 bits of entropy are generated, using RdSeed (preferably) or RdRand (otherwise). Tree-SHA512: fb7d3e22e93e14592f4b07282aa79d7c3cc4e9debdd9978580b8d2562bbad345e289bf3f80de2c50c9b50b8bac2aa9b838f9f272f7f8d43f1efc0913aa8acce3
2 parents f5a623e + 1435fab commit 29e82e4

File tree

2 files changed

+136
-39
lines changed

2 files changed

+136
-39
lines changed

src/random.cpp

Lines changed: 134 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -78,25 +78,119 @@ static inline int64_t GetPerformanceCounter() noexcept
7878
}
7979

8080
#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
81-
static bool rdrand_supported = false;
81+
static bool g_rdrand_supported = false;
82+
static bool g_rdseed_supported = false;
8283
static constexpr uint32_t CPUID_F1_ECX_RDRAND = 0x40000000;
84+
static constexpr uint32_t CPUID_F7_EBX_RDSEED = 0x00040000;
85+
#ifdef bit_RDRND
86+
static_assert(CPUID_F1_ECX_RDRAND == bit_RDRND, "Unexpected value for bit_RDRND");
87+
#endif
88+
#ifdef bit_RDSEED
89+
static_assert(CPUID_F7_EBX_RDSEED == bit_RDSEED, "Unexpected value for bit_RDSEED");
90+
#endif
91+
static void inline GetCPUID(uint32_t leaf, uint32_t subleaf, uint32_t& a, uint32_t& b, uint32_t& c, uint32_t& d)
92+
{
93+
// We can't use __get_cpuid as it doesn't support subleafs.
94+
#ifdef __GNUC__
95+
__cpuid_count(leaf, subleaf, a, b, c, d);
96+
#else
97+
__asm__ ("cpuid" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(leaf), "2"(subleaf));
98+
#endif
99+
}
100+
83101
static void InitHardwareRand()
84102
{
85103
uint32_t eax, ebx, ecx, edx;
86-
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) && (ecx & CPUID_F1_ECX_RDRAND)) {
87-
rdrand_supported = true;
104+
GetCPUID(1, 0, eax, ebx, ecx, edx);
105+
if (ecx & CPUID_F1_ECX_RDRAND) {
106+
g_rdrand_supported = true;
107+
}
108+
GetCPUID(7, 0, eax, ebx, ecx, edx);
109+
if (ebx & CPUID_F7_EBX_RDSEED) {
110+
g_rdseed_supported = true;
88111
}
89112
}
90113

91114
static void ReportHardwareRand()
92115
{
93-
if (rdrand_supported) {
94-
// This must be done in a separate function, as HWRandInit() may be indirectly called
95-
// from global constructors, before logging is initialized.
116+
// This must be done in a separate function, as HWRandInit() may be indirectly called
117+
// from global constructors, before logging is initialized.
118+
if (g_rdseed_supported) {
119+
LogPrintf("Using RdSeed as additional entropy source\n");
120+
}
121+
if (g_rdrand_supported) {
96122
LogPrintf("Using RdRand as an additional entropy source\n");
97123
}
98124
}
99125

126+
/** Read 64 bits of entropy using rdrand.
127+
*
128+
* Must only be called when RdRand is supported.
129+
*/
130+
static uint64_t GetRdRand() noexcept
131+
{
132+
// RdRand may very rarely fail. Invoke it up to 10 times in a loop to reduce this risk.
133+
#ifdef __i386__
134+
uint8_t ok;
135+
uint32_t r1, r2;
136+
for (int i = 0; i < 10; ++i) {
137+
__asm__ volatile (".byte 0x0f, 0xc7, 0xf0; setc %1" : "=a"(r1), "=q"(ok) :: "cc"); // rdrand %eax
138+
if (ok) break;
139+
}
140+
for (int i = 0; i < 10; ++i) {
141+
__asm__ volatile (".byte 0x0f, 0xc7, 0xf0; setc %1" : "=a"(r2), "=q"(ok) :: "cc"); // rdrand %eax
142+
if (ok) break;
143+
}
144+
return (((uint64_t)r2) << 32) | r1;
145+
#elif defined(__x86_64__) || defined(__amd64__)
146+
uint8_t ok;
147+
uint64_t r1;
148+
for (int i = 0; i < 10; ++i) {
149+
__asm__ volatile (".byte 0x48, 0x0f, 0xc7, 0xf0; setc %1" : "=a"(r1), "=q"(ok) :: "cc"); // rdrand %rax
150+
if (ok) break;
151+
}
152+
return r1;
153+
#else
154+
#error "RdRand is only supported on x86 and x86_64"
155+
#endif
156+
}
157+
158+
/** Read 64 bits of entropy using rdseed.
159+
*
160+
* Must only be called when RdSeed is supported.
161+
*/
162+
static uint64_t GetRdSeed() noexcept
163+
{
164+
// RdSeed may fail when the HW RNG is overloaded. Loop indefinitely until enough entropy is gathered,
165+
// but pause after every failure.
166+
#ifdef __i386__
167+
uint8_t ok;
168+
uint32_t r1, r2;
169+
do {
170+
__asm__ volatile (".byte 0x0f, 0xc7, 0xf8; setc %1" : "=a"(r1), "=q"(ok) :: "cc"); // rdseed %eax
171+
if (ok) break;
172+
__asm__ volatile ("pause");
173+
} while(true);
174+
do {
175+
__asm__ volatile (".byte 0x0f, 0xc7, 0xf8; setc %1" : "=a"(r2), "=q"(ok) :: "cc"); // rdseed %eax
176+
if (ok) break;
177+
__asm__ volatile ("pause");
178+
} while(true);
179+
return (((uint64_t)r2) << 32) | r1;
180+
#elif defined(__x86_64__) || defined(__amd64__)
181+
uint8_t ok;
182+
uint64_t r1;
183+
do {
184+
__asm__ volatile (".byte 0x48, 0x0f, 0xc7, 0xf8; setc %1" : "=a"(r1), "=q"(ok) :: "cc"); // rdseed %rax
185+
if (ok) break;
186+
__asm__ volatile ("pause");
187+
} while(true);
188+
return r1;
189+
#else
190+
#error "RdSeed is only supported on x86 and x86_64"
191+
#endif
192+
}
193+
100194
#else
101195
/* Access to other hardware random number generators could be added here later,
102196
* assuming it is sufficiently fast (in the order of a few hundred CPU cycles).
@@ -107,40 +201,40 @@ static void InitHardwareRand() {}
107201
static void ReportHardwareRand() {}
108202
#endif
109203

110-
static bool GetHardwareRand(unsigned char* ent32) noexcept {
204+
/** Add 64 bits of entropy gathered from hardware to hasher. Do nothing if not supported. */
205+
static void SeedHardwareFast(CSHA512& hasher) noexcept {
111206
#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
112-
if (rdrand_supported) {
113-
uint8_t ok;
114-
// Not all assemblers support the rdrand instruction, write it in hex.
115-
#ifdef __i386__
116-
for (int iter = 0; iter < 4; ++iter) {
117-
uint32_t r1, r2;
118-
__asm__ volatile (".byte 0x0f, 0xc7, 0xf0;" // rdrand %eax
119-
".byte 0x0f, 0xc7, 0xf2;" // rdrand %edx
120-
"setc %2" :
121-
"=a"(r1), "=d"(r2), "=q"(ok) :: "cc");
122-
if (!ok) return false;
123-
WriteLE32(ent32 + 8 * iter, r1);
124-
WriteLE32(ent32 + 8 * iter + 4, r2);
125-
}
126-
#else
127-
uint64_t r1, r2, r3, r4;
128-
__asm__ volatile (".byte 0x48, 0x0f, 0xc7, 0xf0, " // rdrand %rax
129-
"0x48, 0x0f, 0xc7, 0xf3, " // rdrand %rbx
130-
"0x48, 0x0f, 0xc7, 0xf1, " // rdrand %rcx
131-
"0x48, 0x0f, 0xc7, 0xf2; " // rdrand %rdx
132-
"setc %4" :
133-
"=a"(r1), "=b"(r2), "=c"(r3), "=d"(r4), "=q"(ok) :: "cc");
134-
if (!ok) return false;
135-
WriteLE64(ent32, r1);
136-
WriteLE64(ent32 + 8, r2);
137-
WriteLE64(ent32 + 16, r3);
138-
WriteLE64(ent32 + 24, r4);
207+
if (g_rdrand_supported) {
208+
uint64_t out = GetRdRand();
209+
hasher.Write((const unsigned char*)&out, sizeof(out));
210+
return;
211+
}
139212
#endif
140-
return true;
213+
}
214+
215+
/** Add 256 bits of entropy gathered from hardware to hasher. Do nothing if not supported. */
216+
static void SeedHardwareSlow(CSHA512& hasher) noexcept {
217+
#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
218+
// When we want 256 bits of entropy, prefer RdSeed over RdRand, as it's
219+
// guaranteed to produce independent randomness on every call.
220+
if (g_rdseed_supported) {
221+
for (int i = 0; i < 4; ++i) {
222+
uint64_t out = GetRdSeed();
223+
hasher.Write((const unsigned char*)&out, sizeof(out));
224+
}
225+
return;
226+
}
227+
// When falling back to RdRand, XOR the result of 1024 results.
228+
// This guarantees a reseeding occurs between each.
229+
if (g_rdrand_supported) {
230+
for (int i = 0; i < 4; ++i) {
231+
uint64_t out = 0;
232+
for (int j = 0; j < 1024; ++j) out ^= GetRdRand();
233+
hasher.Write((const unsigned char*)&out, sizeof(out));
234+
}
235+
return;
141236
}
142237
#endif
143-
return false;
144238
}
145239

146240
static void RandAddSeedPerfmon(CSHA512& hasher)
@@ -407,8 +501,7 @@ static void SeedFast(CSHA512& hasher) noexcept
407501
hasher.Write((const unsigned char*)&ptr, sizeof(ptr));
408502

409503
// Hardware randomness is very fast when available; use it always.
410-
bool have_hw_rand = GetHardwareRand(buffer);
411-
if (have_hw_rand) hasher.Write(buffer, sizeof(buffer));
504+
SeedHardwareFast(hasher);
412505

413506
// High-precision timestamp
414507
SeedTimestamp(hasher);
@@ -460,6 +553,9 @@ static void SeedStartup(CSHA512& hasher) noexcept
460553
RAND_screen();
461554
#endif
462555

556+
// Gather 256 bits of hardware randomness, if available
557+
SeedHardwareSlow(hasher);
558+
463559
// Everything that the 'slow' seeder includes.
464560
SeedSlow(hasher);
465561

src/random.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
* perform 'fast' seeding, consisting of mixing in:
2525
* - A stack pointer (indirectly committing to calling thread and call stack)
2626
* - A high-precision timestamp (rdtsc when available, c++ high_resolution_clock otherwise)
27-
* - Hardware RNG (rdrand) when available.
27+
* - 64 bits from the hardware RNG (rdrand) when available.
2828
* These entropy sources are very fast, and only designed to protect against situations
2929
* where a VM state restore/copy results in multiple systems with the same randomness.
3030
* FastRandomContext on the other hand does not protect against this once created, but
@@ -48,6 +48,7 @@
4848
*
4949
* On first use of the RNG (regardless of what function is called first), all entropy
5050
* sources used in the 'slow' seeder are included, but also:
51+
* - 256 bits from the hardware RNG (rdseed or rdrand) when available.
5152
* - (On Windows) Performance monitoring data from the OS.
5253
* - (On Windows) Through OpenSSL, the screen contents.
5354
*

0 commit comments

Comments
 (0)