Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/root-ci-config/buildconfig/global.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ mathmore=ON
minuit2_omp=ON
roofit_multiprocess=ON
roottest=ON
tbb=ON
test_distrdf_dask=ON
test_distrdf_pyspark=ON
testing=ON
Expand Down
2 changes: 1 addition & 1 deletion core/thread/src/TRWMutexImp.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ template class TRWMutexImp<TMutex, ROOT::Internal::UniqueLockRecurseCount>;
template class TRWMutexImp<ROOT::TSpinMutex, ROOT::Internal::UniqueLockRecurseCount>;

#ifdef R__HAS_TBB
template class TRWMutexImp<std::mutex, ROOT::Internal::RecurseCountsTBB>;
template class TRWMutexImp<std::mutex, ROOT::Internal::RecurseCountsThreadLocal>;
template class TRWMutexImp<std::mutex, ROOT::Internal::RecurseCountsTBBUnique>;
#endif

Expand Down
2 changes: 1 addition & 1 deletion core/thread/src/TReentrantRWLock.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ template class TReentrantRWLock<TMutex, ROOT::Internal::UniqueLockRecurseCount>;
template class TReentrantRWLock<std::mutex, ROOT::Internal::UniqueLockRecurseCount>;

#ifdef R__HAS_TBB
template class TReentrantRWLock<std::mutex, ROOT::Internal::RecurseCountsTBB>;
template class TReentrantRWLock<std::mutex, ROOT::Internal::RecurseCountsThreadLocal>;
template class TReentrantRWLock<std::mutex, ROOT::Internal::RecurseCountsTBBUnique>;
#endif
}
48 changes: 44 additions & 4 deletions core/thread/src/TReentrantRWLock.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <atomic>
#include <condition_variable>
#include <thread>
#include <stdexcept>
#include <unordered_map>

#ifdef R__HAS_TBB
Expand Down Expand Up @@ -152,20 +153,40 @@ struct RecurseCounts {

};

#ifdef R__HAS_TBB
struct RecurseCountsTBB {
// This class is similar to RecurseCountsTBBUnique, but it doesn't use
// different TLS keys for each instance of this class - just like
// tbb::enumerable_thread_specific would do with tbb::ets_no_key.
//
// Instead of implementing the logic with TBB, this class implements it in
// standard C++, using a fixed-sized array of thread local statics for the
// data. This allows for fast lookups, just as if using different TLS keys per
// instance (the alternative would have been the slower std::unordered_map).
//
// We can make this optimization because we know that only two instances of
// this class will ever be created: one for gCoreMutex, and one for testing.
//
// Like this, we can reach the performance of tbb::enumerable_thread_specific
// with tbb::ets_key_per_instance (as implemented in RecurseCountsTBBUnique),
// but without depending on TBB.
struct RecurseCountsThreadLocal {

RecurseCountsThreadLocal() : fId{nextId()} {}

using Hint_t = TVirtualRWMutex::Hint_t;

struct LocalCounts {
size_t fReadersCount = 0;
bool fIsWriter = false;
};
tbb::enumerable_thread_specific<LocalCounts> fLocalCounts;
size_t fWriteRecurse = 0; ///<! Number of re-entry in the lock by the same thread.

using local_t = LocalCounts *;

local_t GetLocal() { return &fLocalCounts.local(); }
local_t GetLocal() {
// O(1) lookup with minimal overhead thanks to std::array
static thread_local std::array<LocalCounts, nMaxInstances> locals;
return &locals[fId];
}

Hint_t *IncrementReadCount(local_t &local)
{
Expand Down Expand Up @@ -210,8 +231,27 @@ struct RecurseCountsTBB {
void ResetIsWriter(local_t &local) { local->fIsWriter = false; }

size_t &GetLocalReadersCount(local_t &local) { return local->fReadersCount; }

private:
// Only two instances are allowed to be created: one for gCoreMutex, and
// one for testing in testRWLock.cxx.
static constexpr std::size_t nMaxInstances = 2;

static size_t nextId()
{
static std::atomic<size_t> counter{0};
size_t cnt = counter++;
if (cnt >= nMaxInstances) {
throw std::runtime_error(
"Maximum number of ROOT::Internal::RecurseCountsThreadLocal instances reached!");
}
return cnt;
}

size_t fId = 0;
};

#ifdef R__HAS_TBB
struct RecurseCountsTBBUnique {
using Hint_t = TVirtualRWMutex::Hint_t;

Expand Down
6 changes: 1 addition & 5 deletions core/thread/src/TThread.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -347,11 +347,7 @@ void TThread::Init()
if (!ROOT::gCoreMutex) {
// To avoid dead locks, caused by shared library opening and/or static initialization
// taking the same lock as 'tls_get_addr_tail', we can not use UniqueLockRecurseCount.
#ifdef R__HAS_TBB
ROOT::gCoreMutex = new ROOT::TRWMutexImp<std::mutex, ROOT::Internal::RecurseCountsTBBUnique>();
#else
ROOT::gCoreMutex = new ROOT::TRWMutexImp<std::mutex, ROOT::Internal::RecurseCounts>();
#endif
ROOT::gCoreMutex = new ROOT::TRWMutexImp<std::mutex, ROOT::Internal::RecurseCountsThreadLocal>();
}
gInterpreterMutex = ROOT::gCoreMutex;
gROOTMutex = gInterpreterMutex;
Expand Down
66 changes: 33 additions & 33 deletions core/thread/test/testRWLock.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -250,15 +250,15 @@ auto gMutex = new TMutex(kTRUE);
auto gRWMutex = new TRWMutexImp<TMutex>();
auto gRWMutexSpin = new TRWMutexImp<ROOT::TSpinMutex>();
auto gRWMutexStd = new TRWMutexImp<std::mutex>();
auto gRWMutexStdThreadLocal = new TRWMutexImp<std::mutex, ROOT::Internal::RecurseCountsThreadLocal>();
#ifdef R__HAS_TBB
auto gRWMutexStdTBB = new TRWMutexImp<std::mutex, ROOT::Internal::RecurseCountsTBB>();
auto gRWMutexStdTBBUnique = new TRWMutexImp<std::mutex, ROOT::Internal::RecurseCountsTBBUnique>();
#endif
auto gReentrantRWMutex = new ROOT::TReentrantRWLock<TMutex>();
auto gReentrantRWMutexSM = new ROOT::TReentrantRWLock<ROOT::TSpinMutex>();
auto gReentrantRWMutexStd = new ROOT::TReentrantRWLock<std::mutex>();
auto gReentrantRWMutexStdThreadLocal = new ROOT::TReentrantRWLock<std::mutex, ROOT::Internal::RecurseCountsThreadLocal>();
#ifdef R__HAS_TBB
auto gReentrantRWMutexStdTBB = new ROOT::TReentrantRWLock<std::mutex, ROOT::Internal::RecurseCountsTBB>();
auto gReentrantRWMutexStdTBBUnique = new ROOT::TReentrantRWLock<std::mutex, ROOT::Internal::RecurseCountsTBBUnique>();
#endif
auto gSpinMutex = new ROOT::TSpinMutex();
Expand Down Expand Up @@ -335,17 +335,17 @@ TEST(RWLock, WriteStdDirectUnLock)
testWriteUnLock(gReentrantRWMutexStd, gRepetition, gWriteHint);
}

#ifdef R__HAS_TBB
TEST(RWLock, WriteStdTBBDirectLock)
TEST(RWLock, WriteStdThreadLocalDirectLock)
{
gWriteHint = testWriteLock(gReentrantRWMutexStdTBB, gRepetition);
gWriteHint = testWriteLock(gReentrantRWMutexStdThreadLocal, gRepetition);
}

TEST(RWLock, WriteStdTBBDirectUnLock)
TEST(RWLock, WriteStdThreadLocalDirectUnLock)
{
testWriteUnLock(gReentrantRWMutexStdTBB, gRepetition, gWriteHint);
testWriteUnLock(gReentrantRWMutexStdThreadLocal, gRepetition, gWriteHint);
}

#ifdef R__HAS_TBB
TEST(RWLock, WriteStdTBBUniqueDirectLock)
{
gWriteHint = testWriteLock(gReentrantRWMutexStdTBBUnique, gRepetition);
Expand Down Expand Up @@ -387,17 +387,17 @@ TEST(RWLock, ReadUnLockStdDirect)
testReadUnLock(gReentrantRWMutexStd, gRepetition, gReadHint);
}

#ifdef R__HAS_TBB
TEST(RWLock, ReadLockStdTBBDirect)
TEST(RWLock, ReadLockStdThreadLocalDirect)
{
gReadHint = testReadLock(gReentrantRWMutexStdTBB, gRepetition);
gReadHint = testReadLock(gReentrantRWMutexStdThreadLocal, gRepetition);
}

TEST(RWLock, ReadUnLockStdTBBDirect)
TEST(RWLock, ReadUnLockStdThreadLocalDirect)
{
testReadUnLock(gReentrantRWMutexStdTBB, gRepetition, gReadHint);
testReadUnLock(gReentrantRWMutexStdThreadLocal, gRepetition, gReadHint);
}

#ifdef R__HAS_TBB
TEST(RWLock, ReadLockStdTBBUniqueDirect)
{
gReadHint = testReadLock(gReentrantRWMutexStdTBBUnique, gRepetition);
Expand Down Expand Up @@ -494,12 +494,12 @@ TEST(RWLock, ReentrantStd)
Reentrant(*gReentrantRWMutexStd);
}

#ifdef R__HAS_TBB
TEST(RWLock, ReentrantStdTBB)
TEST(RWLock, ReentrantStdThreadLocal)
{
Reentrant(*gReentrantRWMutexStdTBB);
Reentrant(*gReentrantRWMutexStdThreadLocal);
}

#ifdef R__HAS_TBB
TEST(RWLock, ReentrantStdTBBUnique)
{
Reentrant(*gReentrantRWMutexStdTBBUnique);
Expand Down Expand Up @@ -531,12 +531,12 @@ TEST(RWLock, ResetRestoreStd)
ResetRestore(*gReentrantRWMutexStd);
}

#ifdef R__HAS_TBB
TEST(RWLock, ResetRestoreStdTBB)
TEST(RWLock, ResetRestoreStdThreadLocal)
{
ResetRestore(*gReentrantRWMutexStdTBB);
ResetRestore(*gReentrantRWMutexStdThreadLocal);
}

#ifdef R__HAS_TBB
TEST(RWLock, ResetRestoreStdTBBUnique)
{
ResetRestore(*gReentrantRWMutexStdTBBUnique);
Expand Down Expand Up @@ -579,12 +579,12 @@ TEST(RWLock, concurrentResetRestoreStd)
concurrentResetRestore(gRWMutexStd, 2, gRepetition / 10000);
}

#ifdef R__HAS_TBB
TEST(RWLock, concurrentResetRestoreStdTBB)
TEST(RWLock, concurrentResetRestoreStdThreadLocal)
{
concurrentResetRestore(gRWMutexStdTBB, 2, gRepetition / 10000);
concurrentResetRestore(gRWMutexStdThreadLocal, 2, gRepetition / 10000);
}

#ifdef R__HAS_TBB
TEST(RWLock, concurrentResetRestoreStdTBBUnique)
{
concurrentResetRestore(gRWMutexStdTBBUnique, 2, gRepetition / 10000);
Expand Down Expand Up @@ -629,12 +629,12 @@ TEST(RWLock, concurrentReadsAndWritesStd)
concurrentReadsAndWrites(gRWMutexStd, 1, 2, gRepetition / 10000);
}

#ifdef R__HAS_TBB
TEST(RWLock, concurrentReadsAndWritesStdTBB)
TEST(RWLock, concurrentReadsAndWritesStdThreadLocal)
{
concurrentReadsAndWrites(gRWMutexStdTBB, 1, 2, gRepetition / 10000);
concurrentReadsAndWrites(gRWMutexStdThreadLocal, 1, 2, gRepetition / 10000);
}

#ifdef R__HAS_TBB
TEST(RWLock, concurrentReadsAndWritesStdTBBUnique)
{
concurrentReadsAndWrites(gRWMutexStdTBBUnique, 1, 2, gRepetition / 10000);
Expand All @@ -651,12 +651,12 @@ TEST(RWLock, LargeconcurrentReadsAndWritesStd)
concurrentReadsAndWrites(gRWMutex, 10, 20, gRepetition / 10000);
}

#ifdef R__HAS_TBB
TEST(RWLock, LargeconcurrentReadsAndWritesStdTBB)
TEST(RWLock, LargeconcurrentReadsAndWritesStdThreadLocal)
{
concurrentReadsAndWrites(gRWMutexStdTBB, 10, 20, gRepetition / 10000);
concurrentReadsAndWrites(gRWMutexStdThreadLocal, 10, 20, gRepetition / 10000);
}

#ifdef R__HAS_TBB
TEST(RWLock, LargeconcurrentReadsAndWritesStdTBBUnique)
{
concurrentReadsAndWrites(gRWMutexStdTBBUnique, 10, 20, gRepetition / 10000);
Expand All @@ -678,12 +678,12 @@ TEST(RWLock, VeryLargeconcurrentReadsAndWritesStd)
concurrentReadsAndWrites(gRWMutexStd, 10, 200, gRepetition / 10000);
}

#ifdef R__HAS_TBB
TEST(RWLock, VeryLargeconcurrentReadsAndWritesStdTBB)
TEST(RWLock, VeryLargeconcurrentReadsAndWritesStdThreadLocal)
{
concurrentReadsAndWrites(gRWMutexStdTBB, 10, 200, gRepetition / 10000);
concurrentReadsAndWrites(gRWMutexStdThreadLocal, 10, 200, gRepetition / 10000);
}

#ifdef R__HAS_TBB
TEST(RWLock, VeryLargeconcurrentReadsAndWritesStdTBBUnique)
{
concurrentReadsAndWrites(gRWMutexStdTBBUnique, 10, 200, gRepetition / 10000);
Expand All @@ -706,12 +706,12 @@ TEST(RWLock, VeryLargeconcurrentReadsStd)
concurrentReadsAndWrites(gRWMutexStd, 0, 200, gRepetition / 10000);
}

#ifdef R__HAS_TBB
TEST(RWLock, VeryLargeconcurrentReadsStdTBB)
TEST(RWLock, VeryLargeconcurrentReadsStdThreadLocal)
{
concurrentReadsAndWrites(gRWMutexStdTBB, 0, 200, gRepetition / 10000);
concurrentReadsAndWrites(gRWMutexStdThreadLocal, 0, 200, gRepetition / 10000);
}

#ifdef R__HAS_TBB
TEST(RWLock, VeryLargeconcurrentReadsStdTBBUnique)
{
concurrentReadsAndWrites(gRWMutexStdTBBUnique, 0, 200, gRepetition / 10000);
Expand Down
Loading