-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[scudo] Refactor initialization of TSDs. #169738
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Instead of getting a lock and then checking/modifying the Initialization variable, make it an atomic. Doing this, we can remove one of the mutexes in shared TSDs and avoid any potential lock contention in both shared TSDs and exclusive TSDs if multiple threads do allocation operations at the same time. Add two new tests that make sure no crashes occur if multiple threads try and do allocations at the same time.
|
@llvm/pr-subscribers-compiler-rt-sanitizer Author: Christopher Ferris (cferris1000) ChangesInstead of getting a lock and then checking/modifying the Initialization variable, make it an atomic. Doing this, we can remove one of the mutexes in shared TSDs and avoid any potential lock contention in both shared TSDs and exclusive TSDs if multiple threads do allocation operations at the same time. Add two new tests that make sure no crashes occur if multiple threads try and do allocations at the same time. Full diff: https://github.com/llvm/llvm-project/pull/169738.diff 3 Files Affected:
diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
index 1d4208b6a2aa0..50e7445c4b4e4 100644
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -18,6 +18,7 @@
#include "size_class_map.h"
#include <algorithm>
+#include <atomic>
#include <condition_variable>
#include <memory>
#include <mutex>
@@ -1396,6 +1397,7 @@ TEST(ScudoCombinedTest, FullUsableSizeMTE) {
VerifyExactUsableSize<AllocatorT>(*Allocator);
VerifyIterateOverUsableSize<AllocatorT>(*Allocator);
}
+
// Verify that no special quarantine blocks appear in iterateOverChunks.
TEST(ScudoCombinedTest, QuarantineIterateOverChunks) {
using AllocatorT = TestAllocator<TestQuarantineConfig>;
@@ -1426,3 +1428,91 @@ TEST(ScudoCombinedTest, QuarantineIterateOverChunks) {
<< std::hex << Base << " Size " << std::dec << Size;
}
}
+
+struct InitSizeClassConfig {
+ static const scudo::uptr NumBits = 1;
+ static const scudo::uptr MinSizeLog = 10;
+ static const scudo::uptr MidSizeLog = 10;
+ static const scudo::uptr MaxSizeLog = 13;
+ static const scudo::u16 MaxNumCachedHint = 8;
+ static const scudo::uptr MaxBytesCachedLog = 12;
+ static const scudo::uptr SizeDelta = 0;
+};
+
+struct TestInitSizeConfig {
+ static const bool MaySupportMemoryTagging = false;
+ static const bool QuarantineDisabled = true;
+
+ struct Primary {
+ // In order to properly test the usable size, this Primary config has
+ // four real size classes: 1024, 2048, 4096, 8192.
+ using SizeClassMap = scudo::FixedSizeClassMap<InitSizeClassConfig>;
+ static const scudo::uptr RegionSizeLog = 21U;
+ static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN;
+ static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX;
+ typedef scudo::uptr CompactPtrT;
+ static const scudo::uptr CompactPtrScale = 0;
+ static const bool EnableRandomOffset = true;
+ static const scudo::uptr MapSizeIncrement = 1UL << 18;
+ static const scudo::uptr GroupSizeLog = 18;
+ };
+ template <typename Config>
+ using PrimaryT = scudo::SizeClassAllocator64<Config>;
+
+ struct Secondary {
+ template <typename Config>
+ using CacheT = scudo::MapAllocatorNoCache<Config>;
+ };
+
+ template <typename Config> using SecondaryT = scudo::MapAllocator<Config>;
+};
+
+struct TestInitSizeTSDSharedConfig : public TestInitSizeConfig {
+ template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 4U, 4U>;
+};
+
+struct TestInitSizeTSDExclusiveConfig : public TestInitSizeConfig {
+ template <class A> using TSDRegistryT = scudo::TSDRegistryExT<A>;
+};
+
+template <class AllocatorT>
+void RunStress() {
+ auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT());
+
+ // This test is designed to try and have many threads trying to initialize
+ // the TSD at the same time. Make sure this doesn't crash.
+ std::atomic_bool StartRunning = false;
+ std::vector<std::thread*> threads;
+ for (size_t I = 0; I < 16; I++) {
+ threads.emplace_back(new std::thread([&Allocator, &StartRunning] (){
+ while (!StartRunning.load());
+
+ void *Ptr = Allocator->allocate(10, Origin);
+ EXPECT_TRUE(Ptr != nullptr);
+ // Make sure this value is not optimized away.
+ asm volatile("" : : "r,m"(Ptr) : "memory");
+ Allocator->deallocate(Ptr, Origin);
+ }));
+ }
+
+ StartRunning = true;
+
+ for (auto* thread : threads) {
+ thread->join();
+ delete thread;
+ }
+}
+
+TEST(ScudoCombinedTest, StressThreadInitTSDShared) {
+ using AllocatorT = scudo::Allocator<TestInitSizeTSDSharedConfig>;
+ // Run the stress test a few times.
+ for (size_t I = 0; I < 10; I++)
+ RunStress<AllocatorT>();
+}
+
+TEST(ScudoCombinedTest, StressThreadInitTSDExclusive) {
+ using AllocatorT = scudo::Allocator<TestInitSizeTSDExclusiveConfig>;
+ // Run the stress test a few times.
+ for (size_t I = 0; I < 10; I++)
+ RunStress<AllocatorT>();
+}
diff --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
index a58ba6505089f..75921f2be3ffe 100644
--- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
+++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
@@ -52,17 +52,20 @@ template <class Allocator> struct TSDRegistryExT {
bool UnlockRequired;
};
- void init(Allocator *Instance) REQUIRES(Mutex) {
- DCHECK(!Initialized);
+ void init(Allocator *Instance) EXCLUDES(Mutex) {
+ ScopedLock L(Mutex);
+ // If more than one thread is initializing at the exact same moment, the
+ // threads that lose don't need to do anything.
+ if (UNLIKELY(atomic_load_relaxed(&Initialized) != 0))
+ return;
Instance->init();
CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread<Allocator>), 0);
FallbackTSD.init(Instance);
- Initialized = true;
+ atomic_store_relaxed(&Initialized, 1);
}
- void initOnceMaybe(Allocator *Instance) EXCLUDES(Mutex) {
- ScopedLock L(Mutex);
- if (LIKELY(Initialized))
+ void initOnceMaybe(Allocator *Instance) {
+ if (LIKELY(atomic_load_relaxed(&Initialized) != 0))
return;
init(Instance); // Sets Initialized.
}
@@ -81,7 +84,7 @@ template <class Allocator> struct TSDRegistryExT {
FallbackTSD = {};
State = {};
ScopedLock L(Mutex);
- Initialized = false;
+ atomic_store_relaxed(&Initialized, 0);
}
void drainCaches(Allocator *Instance) {
@@ -158,7 +161,7 @@ template <class Allocator> struct TSDRegistryExT {
}
pthread_key_t PThreadKey = {};
- bool Initialized GUARDED_BY(Mutex) = false;
+ atomic_u8 Initialized = {};
atomic_u8 Disabled = {};
TSD<Allocator> FallbackTSD;
HybridMutex Mutex;
diff --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h
index 404e984e1f5e9..425a028c955aa 100644
--- a/compiler-rt/lib/scudo/standalone/tsd_shared.h
+++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h
@@ -47,20 +47,24 @@ struct TSDRegistrySharedT {
TSD<Allocator> *CurrentTSD;
};
- void init(Allocator *Instance) REQUIRES(Mutex) {
- DCHECK(!Initialized);
+ void init(Allocator *Instance) EXCLUDES(Mutex) {
+ ScopedLock L(Mutex);
+ // If more than one thread is initializing at the exact same moment, the
+ // threads that lose don't need to do anything.
+ if (UNLIKELY(atomic_load_relaxed(&Initialized) != 0))
+ return;
+
Instance->init();
for (u32 I = 0; I < TSDsArraySize; I++)
TSDs[I].init(Instance);
const u32 NumberOfCPUs = getNumberOfCPUs();
setNumberOfTSDs((NumberOfCPUs == 0) ? DefaultTSDCount
: Min(NumberOfCPUs, DefaultTSDCount));
- Initialized = true;
+ atomic_store_relaxed(&Initialized, 1);
}
- void initOnceMaybe(Allocator *Instance) EXCLUDES(Mutex) {
- ScopedLock L(Mutex);
- if (LIKELY(Initialized))
+ void initOnceMaybe(Allocator *Instance) {
+ if (LIKELY(atomic_load_relaxed(&Initialized) != 0))
return;
init(Instance); // Sets Initialized.
}
@@ -72,11 +76,11 @@ struct TSDRegistrySharedT {
}
setCurrentTSD(nullptr);
ScopedLock L(Mutex);
- Initialized = false;
+ atomic_store_relaxed(&Initialized, 0);
}
void drainCaches(Allocator *Instance) {
- ScopedLock L(MutexTSDs);
+ ScopedLock L(Mutex);
for (uptr I = 0; I < NumberOfTSDs; ++I) {
TSDs[I].lock();
Instance->drainCache(&TSDs[I]);
@@ -93,7 +97,6 @@ struct TSDRegistrySharedT {
void disable() NO_THREAD_SAFETY_ANALYSIS {
Mutex.lock();
- MutexTSDs.lock();
for (u32 I = 0; I < TSDsArraySize; I++)
TSDs[I].lock();
}
@@ -101,13 +104,14 @@ struct TSDRegistrySharedT {
void enable() NO_THREAD_SAFETY_ANALYSIS {
for (s32 I = static_cast<s32>(TSDsArraySize - 1); I >= 0; I--)
TSDs[I].unlock();
- MutexTSDs.unlock();
Mutex.unlock();
}
bool setOption(Option O, sptr Value) {
- if (O == Option::MaxTSDsCount)
+ if (O == Option::MaxTSDsCount) {
+ ScopedLock L(Mutex);
return setNumberOfTSDs(static_cast<u32>(Value));
+ }
if (O == Option::ThreadDisableMemInit)
setDisableMemInit(Value);
// Not supported by the TSD Registry, but not an error either.
@@ -116,8 +120,8 @@ struct TSDRegistrySharedT {
bool getDisableMemInit() const { return *getTlsPtr() & 1; }
- void getStats(ScopedString *Str) EXCLUDES(MutexTSDs) {
- ScopedLock L(MutexTSDs);
+ void getStats(ScopedString *Str) EXCLUDES(Mutex) {
+ ScopedLock L(Mutex);
Str->append("Stats: SharedTSDs: %u available; total %u\n", NumberOfTSDs,
TSDsArraySize);
@@ -171,8 +175,7 @@ struct TSDRegistrySharedT {
return reinterpret_cast<TSD<Allocator> *>(*getTlsPtr() & ~1ULL);
}
- bool setNumberOfTSDs(u32 N) EXCLUDES(MutexTSDs) {
- ScopedLock L(MutexTSDs);
+ bool setNumberOfTSDs(u32 N) REQUIRES(Mutex) {
if (N < NumberOfTSDs)
return false;
if (N > TSDsArraySize)
@@ -213,14 +216,14 @@ struct TSDRegistrySharedT {
// TSDs is an array of locks which is not supported for marking thread-safety
// capability.
NOINLINE TSD<Allocator> *getTSDAndLockSlow(TSD<Allocator> *CurrentTSD)
- EXCLUDES(MutexTSDs) {
+ EXCLUDES(Mutex) {
// Use the Precedence of the current TSD as our random seed. Since we are
// in the slow path, it means that tryLock failed, and as a result it's
// very likely that said Precedence is non-zero.
const u32 R = static_cast<u32>(CurrentTSD->getPrecedence());
u32 N, Inc;
{
- ScopedLock L(MutexTSDs);
+ ScopedLock L(Mutex);
N = NumberOfTSDs;
DCHECK_NE(NumberOfCoPrimes, 0U);
Inc = CoPrimes[R % NumberOfCoPrimes];
@@ -257,12 +260,15 @@ struct TSDRegistrySharedT {
}
atomic_u32 CurrentIndex = {};
- u32 NumberOfTSDs GUARDED_BY(MutexTSDs) = 0;
- u32 NumberOfCoPrimes GUARDED_BY(MutexTSDs) = 0;
- u32 CoPrimes[TSDsArraySize] GUARDED_BY(MutexTSDs) = {};
- bool Initialized GUARDED_BY(Mutex) = false;
+ u32 NumberOfTSDs GUARDED_BY(Mutex) = 0;
+ u32 NumberOfCoPrimes GUARDED_BY(Mutex) = 0;
+ u32 CoPrimes[TSDsArraySize] GUARDED_BY(Mutex) = {};
+ atomic_u8 Initialized = {};
+ // Used for global initialization and TSDs access.
+ // Acquiring the global initialization should only lock once in normal
+ // operation, which is why using it for TSDs access should not cause
+ // any interference.
HybridMutex Mutex;
- HybridMutex MutexTSDs;
TSD<Allocator> TSDs[TSDsArraySize];
};
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
This moves to a single mutex for shared TSDs. I think this should be fine because I also made it so that the global initialization doesn't lock the mutex any more. |
|
@sadafebrahami Here is one of my other pull-requests. |
Instead of getting a lock and then checking/modifying the Initialization variable, make it an atomic. Doing this, we can remove one of the mutexes in shared TSDs and avoid any potential lock contention in both shared TSDs and exclusive TSDs if multiple threads do allocation operations at the same time.
Add two new tests that make sure no crashes occur if multiple threads try and do allocations at the same time.