Skip to content

Commit 64ee4bf

Browse files
authored
[scudo] Refactor initialization of TSDs. (#169738)
Instead of getting a lock and then checking/modifying the Initialization variable, make it an atomic. Doing this, we can remove one of the mutexes in shared TSDs and avoid any potential lock contention in both shared TSDs and exclusive TSDs if multiple threads do allocation operations at the same time. Add two new tests that make sure no crashes occur if multiple threads try and do allocations at the same time.
1 parent 926cbdd commit 64ee4bf

File tree

3 files changed

+127
-30
lines changed

3 files changed

+127
-30
lines changed

compiler-rt/lib/scudo/standalone/tests/combined_test.cpp

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "size_class_map.h"
1919

2020
#include <algorithm>
21+
#include <atomic>
2122
#include <condition_variable>
2223
#include <memory>
2324
#include <mutex>
@@ -1396,6 +1397,7 @@ TEST(ScudoCombinedTest, FullUsableSizeMTE) {
13961397
VerifyExactUsableSize<AllocatorT>(*Allocator);
13971398
VerifyIterateOverUsableSize<AllocatorT>(*Allocator);
13981399
}
1400+
13991401
// Verify that no special quarantine blocks appear in iterateOverChunks.
14001402
TEST(ScudoCombinedTest, QuarantineIterateOverChunks) {
14011403
using AllocatorT = TestAllocator<TestQuarantineConfig>;
@@ -1426,3 +1428,89 @@ TEST(ScudoCombinedTest, QuarantineIterateOverChunks) {
14261428
<< std::hex << Base << " Size " << std::dec << Size;
14271429
}
14281430
}
1431+
1432+
struct InitSizeClassConfig {
1433+
static const scudo::uptr NumBits = 1;
1434+
static const scudo::uptr MinSizeLog = 10;
1435+
static const scudo::uptr MidSizeLog = 10;
1436+
static const scudo::uptr MaxSizeLog = 13;
1437+
static const scudo::u16 MaxNumCachedHint = 8;
1438+
static const scudo::uptr MaxBytesCachedLog = 12;
1439+
static const scudo::uptr SizeDelta = 0;
1440+
};
1441+
1442+
struct TestInitSizeConfig {
1443+
static const bool MaySupportMemoryTagging = false;
1444+
static const bool QuarantineDisabled = true;
1445+
1446+
struct Primary {
1447+
using SizeClassMap = scudo::FixedSizeClassMap<InitSizeClassConfig>;
1448+
static const scudo::uptr RegionSizeLog = 21U;
1449+
static const scudo::s32 MinReleaseToOsIntervalMs = INT32_MIN;
1450+
static const scudo::s32 MaxReleaseToOsIntervalMs = INT32_MAX;
1451+
typedef scudo::uptr CompactPtrT;
1452+
static const scudo::uptr CompactPtrScale = 0;
1453+
static const bool EnableRandomOffset = true;
1454+
static const scudo::uptr MapSizeIncrement = 1UL << 18;
1455+
static const scudo::uptr GroupSizeLog = 18;
1456+
};
1457+
template <typename Config>
1458+
using PrimaryT = scudo::SizeClassAllocator64<Config>;
1459+
1460+
struct Secondary {
1461+
template <typename Config>
1462+
using CacheT = scudo::MapAllocatorNoCache<Config>;
1463+
};
1464+
1465+
template <typename Config> using SecondaryT = scudo::MapAllocator<Config>;
1466+
};
1467+
1468+
struct TestInitSizeTSDSharedConfig : public TestInitSizeConfig {
1469+
template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 4U, 4U>;
1470+
};
1471+
1472+
struct TestInitSizeTSDExclusiveConfig : public TestInitSizeConfig {
1473+
template <class A> using TSDRegistryT = scudo::TSDRegistryExT<A>;
1474+
};
1475+
1476+
template <class AllocatorT> void RunStress() {
1477+
auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT());
1478+
1479+
// This test is designed to try and have many threads trying to initialize
1480+
// the TSD at the same time. Make sure this doesn't crash.
1481+
std::atomic_bool StartRunning = false;
1482+
std::vector<std::thread *> threads;
1483+
for (size_t I = 0; I < 16; I++) {
1484+
threads.emplace_back(new std::thread([&Allocator, &StartRunning]() {
1485+
while (!StartRunning.load())
1486+
;
1487+
1488+
void *Ptr = Allocator->allocate(10, Origin);
1489+
EXPECT_TRUE(Ptr != nullptr);
1490+
// Make sure this value is not optimized away.
1491+
asm volatile("" : : "r,m"(Ptr) : "memory");
1492+
Allocator->deallocate(Ptr, Origin);
1493+
}));
1494+
}
1495+
1496+
StartRunning = true;
1497+
1498+
for (auto *thread : threads) {
1499+
thread->join();
1500+
delete thread;
1501+
}
1502+
}
1503+
1504+
TEST(ScudoCombinedTest, StressThreadInitTSDShared) {
1505+
using AllocatorT = scudo::Allocator<TestInitSizeTSDSharedConfig>;
1506+
// Run the stress test a few times.
1507+
for (size_t I = 0; I < 10; I++)
1508+
RunStress<AllocatorT>();
1509+
}
1510+
1511+
TEST(ScudoCombinedTest, StressThreadInitTSDExclusive) {
1512+
using AllocatorT = scudo::Allocator<TestInitSizeTSDExclusiveConfig>;
1513+
// Run the stress test a few times.
1514+
for (size_t I = 0; I < 10; I++)
1515+
RunStress<AllocatorT>();
1516+
}

compiler-rt/lib/scudo/standalone/tsd_exclusive.h

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,17 +52,20 @@ template <class Allocator> struct TSDRegistryExT {
5252
bool UnlockRequired;
5353
};
5454

55-
void init(Allocator *Instance) REQUIRES(Mutex) {
56-
DCHECK(!Initialized);
55+
void init(Allocator *Instance) EXCLUDES(Mutex) {
56+
ScopedLock L(Mutex);
57+
// If more than one thread is initializing at the exact same moment, the
58+
// threads that lose don't need to do anything.
59+
if (UNLIKELY(atomic_load_relaxed(&Initialized) != 0))
60+
return;
5761
Instance->init();
5862
CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread<Allocator>), 0);
5963
FallbackTSD.init(Instance);
60-
Initialized = true;
64+
atomic_store_relaxed(&Initialized, 1);
6165
}
6266

63-
void initOnceMaybe(Allocator *Instance) EXCLUDES(Mutex) {
64-
ScopedLock L(Mutex);
65-
if (LIKELY(Initialized))
67+
void initOnceMaybe(Allocator *Instance) {
68+
if (LIKELY(atomic_load_relaxed(&Initialized) != 0))
6669
return;
6770
init(Instance); // Sets Initialized.
6871
}
@@ -81,7 +84,7 @@ template <class Allocator> struct TSDRegistryExT {
8184
FallbackTSD = {};
8285
State = {};
8386
ScopedLock L(Mutex);
84-
Initialized = false;
87+
atomic_store_relaxed(&Initialized, 0);
8588
}
8689

8790
void drainCaches(Allocator *Instance) {
@@ -158,7 +161,7 @@ template <class Allocator> struct TSDRegistryExT {
158161
}
159162

160163
pthread_key_t PThreadKey = {};
161-
bool Initialized GUARDED_BY(Mutex) = false;
164+
atomic_u8 Initialized = {};
162165
atomic_u8 Disabled = {};
163166
TSD<Allocator> FallbackTSD;
164167
HybridMutex Mutex;

compiler-rt/lib/scudo/standalone/tsd_shared.h

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -47,20 +47,24 @@ struct TSDRegistrySharedT {
4747
TSD<Allocator> *CurrentTSD;
4848
};
4949

50-
void init(Allocator *Instance) REQUIRES(Mutex) {
51-
DCHECK(!Initialized);
50+
void init(Allocator *Instance) EXCLUDES(Mutex) {
51+
ScopedLock L(Mutex);
52+
// If more than one thread is initializing at the exact same moment, the
53+
// threads that lose don't need to do anything.
54+
if (UNLIKELY(atomic_load_relaxed(&Initialized) != 0))
55+
return;
56+
5257
Instance->init();
5358
for (u32 I = 0; I < TSDsArraySize; I++)
5459
TSDs[I].init(Instance);
5560
const u32 NumberOfCPUs = getNumberOfCPUs();
5661
setNumberOfTSDs((NumberOfCPUs == 0) ? DefaultTSDCount
5762
: Min(NumberOfCPUs, DefaultTSDCount));
58-
Initialized = true;
63+
atomic_store_relaxed(&Initialized, 1);
5964
}
6065

61-
void initOnceMaybe(Allocator *Instance) EXCLUDES(Mutex) {
62-
ScopedLock L(Mutex);
63-
if (LIKELY(Initialized))
66+
void initOnceMaybe(Allocator *Instance) {
67+
if (LIKELY(atomic_load_relaxed(&Initialized) != 0))
6468
return;
6569
init(Instance); // Sets Initialized.
6670
}
@@ -72,11 +76,11 @@ struct TSDRegistrySharedT {
7276
}
7377
setCurrentTSD(nullptr);
7478
ScopedLock L(Mutex);
75-
Initialized = false;
79+
atomic_store_relaxed(&Initialized, 0);
7680
}
7781

7882
void drainCaches(Allocator *Instance) {
79-
ScopedLock L(MutexTSDs);
83+
ScopedLock L(Mutex);
8084
for (uptr I = 0; I < NumberOfTSDs; ++I) {
8185
TSDs[I].lock();
8286
Instance->drainCache(&TSDs[I]);
@@ -93,21 +97,21 @@ struct TSDRegistrySharedT {
9397

9498
void disable() NO_THREAD_SAFETY_ANALYSIS {
9599
Mutex.lock();
96-
MutexTSDs.lock();
97100
for (u32 I = 0; I < TSDsArraySize; I++)
98101
TSDs[I].lock();
99102
}
100103

101104
void enable() NO_THREAD_SAFETY_ANALYSIS {
102105
for (s32 I = static_cast<s32>(TSDsArraySize - 1); I >= 0; I--)
103106
TSDs[I].unlock();
104-
MutexTSDs.unlock();
105107
Mutex.unlock();
106108
}
107109

108110
bool setOption(Option O, sptr Value) {
109-
if (O == Option::MaxTSDsCount)
111+
if (O == Option::MaxTSDsCount) {
112+
ScopedLock L(Mutex);
110113
return setNumberOfTSDs(static_cast<u32>(Value));
114+
}
111115
if (O == Option::ThreadDisableMemInit)
112116
setDisableMemInit(Value);
113117
// Not supported by the TSD Registry, but not an error either.
@@ -116,8 +120,8 @@ struct TSDRegistrySharedT {
116120

117121
bool getDisableMemInit() const { return *getTlsPtr() & 1; }
118122

119-
void getStats(ScopedString *Str) EXCLUDES(MutexTSDs) {
120-
ScopedLock L(MutexTSDs);
123+
void getStats(ScopedString *Str) EXCLUDES(Mutex) {
124+
ScopedLock L(Mutex);
121125

122126
Str->append("Stats: SharedTSDs: %u available; total %u\n", NumberOfTSDs,
123127
TSDsArraySize);
@@ -171,8 +175,7 @@ struct TSDRegistrySharedT {
171175
return reinterpret_cast<TSD<Allocator> *>(*getTlsPtr() & ~1ULL);
172176
}
173177

174-
bool setNumberOfTSDs(u32 N) EXCLUDES(MutexTSDs) {
175-
ScopedLock L(MutexTSDs);
178+
bool setNumberOfTSDs(u32 N) REQUIRES(Mutex) {
176179
if (N < NumberOfTSDs)
177180
return false;
178181
if (N > TSDsArraySize)
@@ -213,14 +216,14 @@ struct TSDRegistrySharedT {
213216
// TSDs is an array of locks which is not supported for marking thread-safety
214217
// capability.
215218
NOINLINE TSD<Allocator> *getTSDAndLockSlow(TSD<Allocator> *CurrentTSD)
216-
EXCLUDES(MutexTSDs) {
219+
EXCLUDES(Mutex) {
217220
// Use the Precedence of the current TSD as our random seed. Since we are
218221
// in the slow path, it means that tryLock failed, and as a result it's
219222
// very likely that said Precedence is non-zero.
220223
const u32 R = static_cast<u32>(CurrentTSD->getPrecedence());
221224
u32 N, Inc;
222225
{
223-
ScopedLock L(MutexTSDs);
226+
ScopedLock L(Mutex);
224227
N = NumberOfTSDs;
225228
DCHECK_NE(NumberOfCoPrimes, 0U);
226229
Inc = CoPrimes[R % NumberOfCoPrimes];
@@ -257,12 +260,15 @@ struct TSDRegistrySharedT {
257260
}
258261

259262
atomic_u32 CurrentIndex = {};
260-
u32 NumberOfTSDs GUARDED_BY(MutexTSDs) = 0;
261-
u32 NumberOfCoPrimes GUARDED_BY(MutexTSDs) = 0;
262-
u32 CoPrimes[TSDsArraySize] GUARDED_BY(MutexTSDs) = {};
263-
bool Initialized GUARDED_BY(Mutex) = false;
263+
u32 NumberOfTSDs GUARDED_BY(Mutex) = 0;
264+
u32 NumberOfCoPrimes GUARDED_BY(Mutex) = 0;
265+
u32 CoPrimes[TSDsArraySize] GUARDED_BY(Mutex) = {};
266+
atomic_u8 Initialized = {};
267+
// Used for global initialization and TSDs access.
268+
// Acquiring the global initialization should only lock once in normal
269+
// operation, which is why using it for TSDs access should not cause
270+
// any interference.
264271
HybridMutex Mutex;
265-
HybridMutex MutexTSDs;
266272
TSD<Allocator> TSDs[TSDsArraySize];
267273
};
268274

0 commit comments

Comments
 (0)