Skip to content

Commit 6b84ba6

Browse files
authored
Merge pull request #46606 from Dr15Jones/fixModuleAllocMonitor
Remove use of thread_local from ModuleAllocMonitor
2 parents 51ce6f2 + 1f1fdcd commit 6b84ba6

File tree

2 files changed

+123
-2
lines changed

2 files changed

+123
-2
lines changed

PerfTools/AllocMonitor/README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ The monitor is owned by the registry and should not be deleted by any other code
2929
of the monitor, one can call `cms::perftools::AllocMonitorRegistry::deregisterMonitor` to have the monitor removed from
3030
the callback list and be deleted (again, without the deallocation causing any callbacks).
3131

32+
NOTE: Experience has shown that using thread_local within a call to `allocCalled` or `deallocCalled` can lead to unexpected behavior. Therefore if per thread information must be gathered it is recommended to make a system that uses thread ids.
33+
An example of such code can be found in the implementation of ModuleAllocMonitor.
34+
3235
## General usage
3336

3437
To use the facility, one needs to use LD_PRELOAD to load in the memory proxies before the application runs, e.g.
@@ -99,3 +102,16 @@ The output file contains the following information on each line
99102
- Number of calls made to deallocation functions
100103

101104
This service is multi-thread safe. Note that when run multi-threaded the maximum reported value will vary from job to job.
105+
106+
### ModuleAllocMonitor
107+
This service registers a monitor when the service is created (after python parsing is finished but before any modules
108+
have been loaded into cmsRun) and writes module related information to the specified file. The file name, an optional
109+
list of module names, and an optional number of initial events to skip are specified by setting parameters of the
110+
service in the configuration. The parameters are
111+
- filename: name of file to which to write reports
112+
- moduleNames: list of modules which should have their information added to the file. An empty list specifies all modules should be included.
113+
- nEventsToSkip: the number of initial events that must be processed before reporting happens.
114+
115+
The beginning of the file contains a description of the structure and contents of the file.
116+
117+
This service is multi-thread safe.

PerfTools/AllocMonitor/plugins/ModuleAllocMonitor.cc

Lines changed: 107 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,115 @@
2323
#include "FWCore/ServiceRegistry/interface/ModuleCallingContext.h"
2424
#include "DataFormats/Provenance/interface/ModuleDescription.h"
2525

26+
#if defined(ALLOC_USE_PTHREADS)
27+
#include <pthread.h>
28+
#else
29+
#include <unistd.h>
30+
#include <sys/syscall.h>
31+
#endif
32+
2633
#include "moduleAlloc_setupFile.h"
2734
#include "ThreadAllocInfo.h"
2835

2936
namespace {
37+
inline auto thread_id() {
38+
#if defined(ALLOC_USE_PTHREADS)
39+
/*NOTE: if use pthread_self, the values returned by linux had
40+
lots of hash collisions when using a simple % hash. Worked
41+
better if first divided value by 0x700 and then did %.
42+
[test done on el8]
43+
*/
44+
return pthread_self();
45+
#else
46+
return syscall(SYS_gettid);
47+
#endif
48+
}
49+
50+
struct ThreadTracker {
51+
static constexpr unsigned int kHashedEntries = 128;
52+
static constexpr unsigned int kExtraEntries = 128;
53+
static constexpr unsigned int kTotalEntries = kHashedEntries + kExtraEntries;
54+
using entry_type = decltype(thread_id());
55+
static constexpr entry_type kUnusedEntry = ~entry_type(0);
56+
std::array<std::atomic<entry_type>, kHashedEntries> hashed_threads_;
57+
std::array<std::atomic<entry_type>, kExtraEntries> extra_threads_;
58+
59+
ThreadTracker() {
60+
//put a value which will not match the % used when looking up the entry
61+
entry_type entry = 0;
62+
for (auto& v : hashed_threads_) {
63+
v = ++entry;
64+
}
65+
//assume kUsedEntry is not a valid thread-id
66+
for (auto& v : extra_threads_) {
67+
v = kUnusedEntry;
68+
}
69+
}
70+
71+
std::size_t thread_index() {
72+
auto id = thread_id();
73+
auto index = thread_index_guess(id);
74+
auto used_id = hashed_threads_[index].load();
75+
76+
if (id == used_id) {
77+
return index;
78+
}
79+
//try to be first thread to grab the index
80+
auto expected = entry_type(index + 1);
81+
if (used_id == expected) {
82+
if (hashed_threads_[index].compare_exchange_strong(expected, id)) {
83+
return index;
84+
} else {
85+
//another thread just beat us so have to go to non-hash storage
86+
return find_new_index(id);
87+
}
88+
}
89+
//search in non-hash storage
90+
return find_index(id);
91+
}
92+
93+
private:
94+
std::size_t thread_index_guess(entry_type id) const {
95+
#if defined(ALLOC_USE_PTHREADS)
96+
return (id / 0x700) % kHashedEntries;
97+
#else
98+
return id % kHashedEntries;
99+
#endif
100+
}
101+
102+
std::size_t find_new_index(entry_type id) {
103+
std::size_t index = 0;
104+
for (auto& v : extra_threads_) {
105+
entry_type expected = kUnusedEntry;
106+
if (v == expected) {
107+
if (v.compare_exchange_strong(expected, id)) {
108+
return index + kHashedEntries;
109+
}
110+
}
111+
++index;
112+
}
113+
//failed to find an open entry
114+
abort();
115+
return 0;
116+
}
117+
118+
std::size_t find_index(entry_type id) {
119+
std::size_t index = 0;
120+
for (auto const& v : extra_threads_) {
121+
if (v == id) {
122+
return index + kHashedEntries;
123+
}
124+
++index;
125+
}
126+
return find_new_index(id);
127+
}
128+
};
129+
130+
static ThreadTracker& getTracker() {
131+
static ThreadTracker s_tracker;
132+
return s_tracker;
133+
}
134+
30135
using namespace edm::service::moduleAlloc;
31136
class MonitorAdaptor : public cms::perftools::AllocMonitorBase {
32137
public:
@@ -43,8 +148,8 @@ namespace {
43148

44149
private:
45150
static ThreadAllocInfo& threadAllocInfo() {
46-
thread_local ThreadAllocInfo s_info;
47-
return s_info;
151+
static ThreadAllocInfo s_info[ThreadTracker::kTotalEntries];
152+
return s_info[getTracker().thread_index()];
48153
}
49154
void allocCalled(size_t iRequested, size_t iActual, void const*) final {
50155
auto& allocInfo = threadAllocInfo();

0 commit comments

Comments
 (0)