Skip to content

Commit 95322f7

Browse files
ckennellycopybara-github
authored andcommitted
Cache NumCPUs() with slab, allowing us to validate CPUs are within bounds.
This is currently limited to debug builds. PiperOrigin-RevId: 816951509 Change-Id: I74f943130e75e68327efcc8fa448a9caa1714abc
1 parent 5479c11 commit 95322f7

File tree

1 file changed

+14
-10
lines changed

1 file changed

+14
-10
lines changed

tcmalloc/internal/percpu_tcmalloc.h

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ class TcmallocSlab {
380380
std::atomic<bool>* stopped_ = nullptr;
381381
// begins_[size_class] is offset of the size_class region in the slabs area.
382382
std::atomic<uint16_t>* begins_ = nullptr;
383+
int num_cpus_ = 0;
383384
};
384385

385386
// RAII for StopCpu/StartCpu.
@@ -1040,10 +1041,11 @@ template <size_t NumClasses>
10401041
void TcmallocSlab<NumClasses>::Init(
10411042
absl::FunctionRef<void*(size_t, std::align_val_t)> alloc, void* slabs,
10421043
absl::FunctionRef<size_t(size_t)> capacity, Shift shift) {
1043-
stopped_ = new (alloc(sizeof(stopped_[0]) * NumCPUs(),
1044+
num_cpus_ = NumCPUs();
1045+
stopped_ = new (alloc(sizeof(stopped_[0]) * num_cpus_,
10441046
std::align_val_t{ABSL_CACHELINE_SIZE}))
10451047
std::atomic<bool>[NumCPUs()];
1046-
for (int cpu = NumCPUs() - 1; cpu >= 0; cpu--) {
1048+
for (int cpu = num_cpus_ - 1; cpu >= 0; cpu--) {
10471049
stopped_[cpu].store(false, std::memory_order_relaxed);
10481050
}
10491051
begins_ = static_cast<std::atomic<uint16_t>*>(alloc(
@@ -1144,6 +1146,8 @@ std::pair<int, bool> TcmallocSlab<NumClasses>::CacheCpuSlabSlow() {
11441146
tcmalloc_slabs = TCMALLOC_CACHED_SLABS_MASK;
11451147
CompilerBarrier();
11461148
vcpu = VirtualCpu::Synchronize();
1149+
TC_ASSERT_GE(vcpu, 0);
1150+
TC_ASSERT_LT(vcpu, num_cpus_);
11471151
auto slabs_and_shift = slabs_and_shift_.load(std::memory_order_relaxed);
11481152
const auto [slabs, shift] = slabs_and_shift.Get();
11491153
void* start = CpuMemoryStart(slabs, shift, vcpu);
@@ -1248,7 +1252,7 @@ ResizeSlabsInfo TcmallocSlab<NumClasses>::UpdateMaxCapacities(
12481252
begins_[size_class].load(std::memory_order_relaxed);
12491253
}
12501254

1251-
const int num_cpus = NumCPUs();
1255+
const int num_cpus = num_cpus_;
12521256
for (size_t cpu = 0; cpu < num_cpus; ++cpu) {
12531257
TC_CHECK(!stopped_[cpu].load(std::memory_order_relaxed));
12541258
stopped_[cpu].store(true, std::memory_order_relaxed);
@@ -1299,7 +1303,7 @@ auto TcmallocSlab<NumClasses>::ResizeSlabs(
12991303
}
13001304

13011305
TC_ASSERT_NE(new_shift, old_shift);
1302-
const int num_cpus = NumCPUs();
1306+
const int num_cpus = num_cpus_;
13031307
for (size_t cpu = 0; cpu < num_cpus; ++cpu) {
13041308
TC_CHECK(!stopped_[cpu].load(std::memory_order_relaxed));
13051309
stopped_[cpu].store(true, std::memory_order_relaxed);
@@ -1329,14 +1333,14 @@ auto TcmallocSlab<NumClasses>::ResizeSlabs(
13291333
template <size_t NumClasses>
13301334
void* TcmallocSlab<NumClasses>::Destroy(
13311335
absl::FunctionRef<void(void*, size_t, std::align_val_t)> free) {
1332-
free(stopped_, sizeof(stopped_[0]) * NumCPUs(),
1336+
free(stopped_, sizeof(stopped_[0]) * num_cpus_,
13331337
std::align_val_t{ABSL_CACHELINE_SIZE});
13341338
stopped_ = nullptr;
13351339
free(begins_, sizeof(begins_[0]) * NumClasses,
13361340
std::align_val_t{ABSL_CACHELINE_SIZE});
13371341
begins_ = nullptr;
13381342
const auto [slabs, shift] = GetSlabsAndShift(std::memory_order_relaxed);
1339-
free(slabs, GetSlabsAllocSize(shift, NumCPUs()), kPhysicalPageAlign);
1343+
free(slabs, GetSlabsAllocSize(shift, num_cpus_), kPhysicalPageAlign);
13401344
slabs_and_shift_.store({nullptr, shift}, std::memory_order_relaxed);
13411345
return slabs;
13421346
}
@@ -1396,15 +1400,15 @@ void TcmallocSlab<NumClasses>::Drain(int cpu, DrainHandler drain_handler) {
13961400

13971401
template <size_t NumClasses>
13981402
void TcmallocSlab<NumClasses>::StopCpu(int cpu) {
1399-
TC_ASSERT(cpu >= 0 && cpu < NumCPUs(), "cpu=%d", cpu);
1403+
TC_ASSERT(cpu >= 0 && cpu < num_cpus_, "cpu=%d", cpu);
14001404
TC_CHECK(!stopped_[cpu].load(std::memory_order_relaxed));
14011405
stopped_[cpu].store(true, std::memory_order_relaxed);
14021406
FenceCpu(cpu);
14031407
}
14041408

14051409
template <size_t NumClasses>
14061410
void TcmallocSlab<NumClasses>::StartCpu(int cpu) {
1407-
TC_ASSERT(cpu >= 0 && cpu < NumCPUs(), "cpu=%d", cpu);
1411+
TC_ASSERT(cpu >= 0 && cpu < num_cpus_, "cpu=%d", cpu);
14081412
TC_ASSERT(stopped_[cpu].load(std::memory_order_relaxed));
14091413
stopped_[cpu].store(false, std::memory_order_release);
14101414
}
@@ -1413,8 +1417,8 @@ template <size_t NumClasses>
14131417
PerCPUMetadataState TcmallocSlab<NumClasses>::MetadataMemoryUsage() const {
14141418
PerCPUMetadataState result;
14151419
const auto [slabs, shift] = GetSlabsAndShift(std::memory_order_relaxed);
1416-
size_t slabs_size = GetSlabsAllocSize(shift, NumCPUs());
1417-
size_t stopped_size = NumCPUs() * sizeof(stopped_[0]);
1420+
size_t slabs_size = GetSlabsAllocSize(shift, num_cpus_);
1421+
size_t stopped_size = num_cpus_ * sizeof(stopped_[0]);
14181422
size_t begins_size = NumClasses * sizeof(begins_[0]);
14191423
result.virtual_size = stopped_size + slabs_size + begins_size;
14201424
result.resident_size = MInCore::residence(slabs, slabs_size);

0 commit comments

Comments
 (0)