Skip to content

Commit c0f2453

Browse files
anematodevondele
authored andcommitted
Make PGO builds deterministic again
some environment dependent execution (e.g. pid) were being std::hash'ed, and net filenames put in unordered maps. Also uses sprintf instead of std::to_string. Depending on precise content, this could lead to different PGO'ed binaries. This is mitigated by using a basic hash function. This also fixes a potential issue in net filename generation, in cases where std::hash would use invocation dependent salt, which is not the cases today for typical std libraries. Closes official-stockfish#6562 No functional change
1 parent f61d431 commit c0f2453

File tree

4 files changed

+48
-34
lines changed

4 files changed

+48
-34
lines changed

src/misc.h

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include <memory>
3535
#include <string>
3636
#include <string_view>
37+
#include <type_traits>
3738
#include <vector>
3839

3940
#define stringify2(x) #x
@@ -305,24 +306,38 @@ inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
305306
#endif
306307
}
307308

309+
inline std::uint64_t hash_bytes(const char* data, std::size_t size) {
310+
// FNV-1a 64-bit
311+
const char* p = data;
312+
std::uint64_t h = 14695981039346656037ull;
313+
for (std::size_t i = 0; i < size; ++i)
314+
h = (h ^ p[i]) * 1099511628211ull;
315+
return h;
316+
}
308317

309318
template<typename T>
310-
inline void hash_combine(std::size_t& seed, const T& v) {
311-
std::hash<T> hasher;
312-
seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
313-
}
319+
inline std::size_t get_raw_data_hash(const T& value) {
320+
// We must have no padding bytes because we're reinterpreting as char
321+
static_assert(std::has_unique_object_representations<T>());
314322

315-
template<>
316-
inline void hash_combine(std::size_t& seed, const std::size_t& v) {
317-
seed ^= v + 0x9e3779b9 + (seed << 6) + (seed >> 2);
323+
return static_cast<std::size_t>(
324+
hash_bytes(reinterpret_cast<const char*>(&value), sizeof(value)));
318325
}
319326

320327
template<typename T>
321-
inline std::size_t get_raw_data_hash(const T& value) {
322-
return std::hash<std::string_view>{}(
323-
std::string_view(reinterpret_cast<const char*>(&value), sizeof(value)));
328+
inline void hash_combine(std::size_t& seed, const T& v) {
329+
std::size_t x;
330+
// For primitive types we avoid using the default hasher, which may be
331+
// nondeterministic across program invocations
332+
if constexpr (std::is_integral<T>())
333+
x = v;
334+
else
335+
x = std::hash<T>{}(v);
336+
seed ^= x + 0x9e3779b9 + (seed << 6) + (seed >> 2);
324337
}
325338

339+
inline std::uint64_t hash_string(const std::string& sv) { return hash_bytes(sv.data(), sv.size()); }
340+
326341
template<std::size_t Capacity>
327342
class FixedString {
328343
public:
@@ -450,7 +465,7 @@ void move_to_front(std::vector<T>& vec, Predicate pred) {
450465
template<std::size_t N>
451466
struct std::hash<Stockfish::FixedString<N>> {
452467
std::size_t operator()(const Stockfish::FixedString<N>& fstr) const noexcept {
453-
return std::hash<std::string_view>{}((std::string_view) fstr);
468+
return Stockfish::hash_bytes(fstr.data(), fstr.size());
454469
}
455470
};
456471

src/numa.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1586,7 +1586,7 @@ class LazyNumaReplicatedSystemWide: public NumaReplicatedBase {
15861586
CpuIndex cpu = *cfg.nodes[idx].begin(); // get a CpuIndex from NumaIndex
15871587
NumaIndex sys_idx = cfg_sys.is_cpu_assigned(cpu) ? cfg_sys.nodeByCpu.at(cpu) : 0;
15881588
std::string s = cfg_sys.to_string() + "$" + std::to_string(sys_idx);
1589-
return std::hash<std::string>{}(s);
1589+
return static_cast<std::size_t>(hash_string(s));
15901590
}
15911591

15921592
void ensure_present(NumaIndex idx) const {

src/shm.h

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#define SHM_H_INCLUDED
2121

2222
#include <algorithm>
23+
#include <cinttypes>
2324
#include <cstddef>
2425
#include <cstdint>
2526
#include <cstring>
@@ -512,12 +513,9 @@ template<typename T>
512513
struct SystemWideSharedConstant {
513514
private:
514515
static std::string createHashString(const std::string& input) {
515-
size_t hash = std::hash<std::string>{}(input);
516-
517-
std::stringstream ss;
518-
ss << std::hex << std::setfill('0') << hash;
519-
520-
return ss.str();
516+
char buf[1024];
517+
std::snprintf(buf, sizeof(buf), "%016" PRIx64, hash_string(input));
518+
return buf;
521519
}
522520

523521
public:
@@ -534,11 +532,11 @@ struct SystemWideSharedConstant {
534532
// that are not present in the content, for example NUMA node allocation.
535533
SystemWideSharedConstant(const T& value, std::size_t discriminator = 0) {
536534
std::size_t content_hash = std::hash<T>{}(value);
537-
std::size_t executable_hash = std::hash<std::string>{}(getExecutablePathHash());
535+
std::size_t executable_hash = hash_string(getExecutablePathHash());
538536

539-
std::string shm_name = std::string("Local\\sf_") + std::to_string(content_hash) + "$"
540-
+ std::to_string(executable_hash) + "$"
541-
+ std::to_string(discriminator);
537+
char buf[1024];
538+
std::snprintf(buf, sizeof(buf), "Local\\sf_%zu$%zu$%zu", content_hash, executable_hash, discriminator);
539+
std::string shm_name = buf;
542540

543541
#if !defined(_WIN32)
544542
// POSIX shared memory names must start with a slash

src/shm_linux.h

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
#include <string>
3434
#include <inttypes.h>
3535
#include <type_traits>
36-
#include <unordered_set>
3736

3837
#include <fcntl.h>
3938
#include <signal.h>
@@ -51,6 +50,7 @@
5150
#define SF_MAX_SEM_NAME_LEN 255
5251
#endif
5352

53+
#include "misc.h"
5454

5555
namespace Stockfish::shm {
5656

@@ -74,17 +74,18 @@ class SharedMemoryBase {
7474
class SharedMemoryRegistry {
7575
private:
7676
static std::mutex registry_mutex_;
77-
static std::unordered_set<SharedMemoryBase*> active_instances_;
77+
static std::vector<SharedMemoryBase*> active_instances_;
7878

7979
public:
8080
static void register_instance(SharedMemoryBase* instance) {
8181
std::scoped_lock lock(registry_mutex_);
82-
active_instances_.insert(instance);
82+
active_instances_.push_back(instance);
8383
}
8484

8585
static void unregister_instance(SharedMemoryBase* instance) {
8686
std::scoped_lock lock(registry_mutex_);
87-
active_instances_.erase(instance);
87+
active_instances_.erase(
88+
std::remove(active_instances_.begin(), active_instances_.end(), instance), active_instances_.end());
8889
}
8990

9091
static void cleanup_all(bool skip_unmap = false) noexcept {
@@ -96,7 +97,7 @@ class SharedMemoryRegistry {
9697
};
9798

9899
inline std::mutex SharedMemoryRegistry::registry_mutex_;
99-
inline std::unordered_set<SharedMemoryBase*> SharedMemoryRegistry::active_instances_;
100+
inline std::vector<SharedMemoryBase*> SharedMemoryRegistry::active_instances_;
100101

101102
class CleanupHooks {
102103
private:
@@ -181,9 +182,10 @@ class SharedMemory: public detail::SharedMemoryBase {
181182
}
182183

183184
static std::string make_sentinel_base(const std::string& name) {
184-
uint64_t hash = std::hash<std::string>{}(name);
185185
char buf[32];
186-
std::snprintf(buf, sizeof(buf), "sfshm_%016" PRIx64, static_cast<uint64_t>(hash));
186+
// Using std::to_string here causes non-deterministic PGO builds.
187+
// snprintf, being part of libc, is insensitive to the formatted values.
188+
std::snprintf(buf, sizeof(buf), "sfshm_%016" PRIu64, hash_string(name));
187189
return buf;
188190
}
189191

@@ -442,11 +444,10 @@ class SharedMemory: public detail::SharedMemoryBase {
442444
}
443445

444446
std::string sentinel_full_path(pid_t pid) const {
445-
std::string path = "/dev/shm/";
446-
path += sentinel_base_;
447-
path.push_back('.');
448-
path += std::to_string(pid);
449-
return path;
447+
char buf[1024];
448+
// See above snprintf comment
449+
std::snprintf(buf, sizeof(buf), "/dev/shm/%s.%ld", sentinel_base_.c_str(), long(pid));
450+
return buf;
450451
}
451452

452453
void decrement_refcount_relaxed() noexcept {

0 commit comments

Comments
 (0)