Skip to content

Commit d30b1e7

Browse files
common: common: cputrace global anchor IDs and unique per-thread assignment
Updated `HWProfileFunctionF` macro to use a global anchor registry (`name_to_id` / `next_id`) so IDs are unique across translation units. Replaced simple `thread_local` hashing with a thread-safe per-thread ID assignment. Signed-off-by: Jaya Prakash <[email protected]>
1 parent 2491a6c commit d30b1e7

File tree

2 files changed

+83
-103
lines changed

2 files changed

+83
-103
lines changed

src/common/cputrace.cc

Lines changed: 69 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,24 @@
2626

2727
#define PROFILE_ASSERT(x) if (!(x)) { fprintf(stderr, "Assert failed %s:%d\n", __FILE__, __LINE__); exit(1); }
2828

29-
static thread_local uint64_t thread_id_hash;
30-
static thread_local bool thread_id_initialized;
29+
static int thread_next_id = 0;
30+
static std::mutex thread_id_mtx;
31+
static thread_local int thread_id_local = -1;
3132
static cputrace_profiler g_profiler;
3233
static std::unordered_map<std::string, measurement_t> g_named_measurements;
3334
static std::mutex g_named_measurements_lock;
35+
static std::unordered_map<std::string, int> name_to_id;
36+
static int next_id = 0;
37+
static std::mutex name_id_mtx;
38+
39+
int register_anchor(const char* name) {
40+
std::lock_guard<std::mutex> lock(name_id_mtx);
41+
auto it = name_to_id.find(name);
42+
ceph_assert(it == name_to_id.end());
43+
int id = next_id++;
44+
name_to_id[name] = id;
45+
return id;
46+
}
3447

3548
struct read_format {
3649
uint64_t nr;
@@ -45,15 +58,12 @@ static long perf_event_open(struct perf_event_attr* hw_event, pid_t pid,
4558
return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
4659
}
4760

48-
static uint64_t get_thread_id() {
49-
if (!thread_id_initialized) {
50-
uint64_t tid = pthread_self();
51-
for (int i = 0; i < 8; i++)
52-
tid = (tid << 7) ^ (tid >> 3);
53-
thread_id_hash = tid % CPUTRACE_MAX_THREADS;
54-
thread_id_initialized = true;
61+
inline int get_thread_id() {
62+
if (thread_id_local == -1) {
63+
std::lock_guard<std::mutex> lck(thread_id_mtx);
64+
thread_id_local = thread_next_id++ % CPUTRACE_MAX_THREADS;
5565
}
56-
return thread_id_hash;
66+
return thread_id_local;
5767
}
5868

5969
static void setup_perf_event(struct perf_event_attr* pe, uint32_t type, uint64_t config) {
@@ -155,7 +165,6 @@ void HW_clean(HW_ctx* ctx) {
155165
close_perf_fd(ctx->fd_cmiss);
156166
close_perf_fd(ctx->fd_bmiss);
157167
close_perf_fd(ctx->fd_ins);
158-
close_perf_fd(ctx->parent_fd);
159168
}
160169

161170
void HW_read(HW_ctx* ctx, sample_t* measure) {
@@ -188,12 +197,10 @@ static void collect_samples(sample_t* start, sample_t* end, cputrace_anchor* anc
188197

189198
HW_profile::HW_profile(const char* function, uint64_t index, cputrace_flags flags)
190199
: function(function), index(index), flags(flags) {
191-
pthread_mutex_lock(&g_profiler.global_lock);
192-
if (index >= CPUTRACE_MAX_ANCHORS || !g_profiler.profiling) {
193-
pthread_mutex_unlock(&g_profiler.global_lock);
200+
if (!g_profiler.profiling.load()) {
194201
return;
195202
}
196-
pthread_mutex_unlock(&g_profiler.global_lock);
203+
ceph_assert(index < CPUTRACE_MAX_ANCHORS);
197204
uint64_t tid = get_thread_id();
198205
cputrace_anchor& anchor = g_profiler.anchors[index];
199206
pthread_mutex_lock(&anchor.lock);
@@ -219,17 +226,12 @@ HW_profile::HW_profile(const char* function, uint64_t index, cputrace_flags flag
219226
}
220227

221228
HW_profile::~HW_profile() {
222-
cputrace_anchor& anchor = g_profiler.anchors[index];
223-
uint64_t tid = get_thread_id();
224-
pthread_mutex_lock(&g_profiler.global_lock);
225-
if (!g_profiler.profiling || index >= CPUTRACE_MAX_ANCHORS){
226-
pthread_mutex_lock(&anchor.lock);
227-
anchor.is_capturing[tid] = false;
228-
pthread_mutex_unlock(&anchor.lock);
229-
pthread_mutex_unlock(&g_profiler.global_lock);
229+
if (!g_profiler.profiling.load()) {
230230
return;
231231
}
232-
pthread_mutex_unlock(&g_profiler.global_lock);
232+
ceph_assert(index < CPUTRACE_MAX_ANCHORS);
233+
cputrace_anchor& anchor = g_profiler.anchors[index];
234+
uint64_t tid = get_thread_id();
233235
pthread_mutex_lock(&anchor.lock);
234236
anchor.nest_level[tid]--;
235237
if (anchor.nest_level[tid] == 0) {
@@ -247,95 +249,79 @@ measurement_t* get_named_measurement(const std::string& name) {
247249
}
248250

249251
HW_named_guard::HW_named_guard(const char* name, HW_ctx* ctx)
250-
: name(name)
252+
: name(name),
253+
guard(ctx, get_named_measurement(name))
251254
{
252-
measurement_t* meas = get_named_measurement(name);
253-
guard = new HW_guard(ctx, meas);
254255
}
255256

256257
HW_named_guard::~HW_named_guard() {
257-
delete guard;
258258
}
259259

260-
void cputrace_start() {
261-
pthread_mutex_lock(&g_profiler.global_lock);
262-
if (g_profiler.profiling) {
263-
pthread_mutex_unlock(&g_profiler.global_lock);
260+
void cputrace_start(ceph::Formatter* f) {
261+
if (g_profiler.profiling.load()) {
262+
if (f) {
263+
f->open_object_section("cputrace_start");
264+
f->dump_format("status", "Profiling already active");
265+
f->close_section();
266+
}
264267
return;
265268
}
266269
g_profiler.profiling = true;
267-
pthread_mutex_unlock(&g_profiler.global_lock);
268-
}
269-
270-
void cputrace_start(ceph::Formatter* f) {
271-
pthread_mutex_lock(&g_profiler.global_lock);
272-
if (g_profiler.profiling) {
270+
if (f) {
273271
f->open_object_section("cputrace_start");
274-
f->dump_format("status", "Profiling already active");
272+
f->dump_format("status", "Profiling started");
275273
f->close_section();
276-
pthread_mutex_unlock(&g_profiler.global_lock);
277-
return;
278274
}
279-
g_profiler.profiling = true;
280-
f->open_object_section("cputrace_start");
281-
f->dump_format("status", "Profiling started");
282-
f->close_section();
283-
pthread_mutex_unlock(&g_profiler.global_lock);
284275
}
285276

286-
void cputrace_stop() {
287-
pthread_mutex_lock(&g_profiler.global_lock);
288-
if (!g_profiler.profiling) {
289-
pthread_mutex_unlock(&g_profiler.global_lock);
277+
void cputrace_stop(ceph::Formatter* f) {
278+
if (!g_profiler.profiling.load()) {
279+
if (f) {
280+
f->open_object_section("cputrace_stop");
281+
f->dump_format("status", "Profiling not active");
282+
f->close_section();
283+
}
290284
return;
291285
}
286+
for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
287+
cputrace_anchor& anchor = g_profiler.anchors[i];
288+
if (!anchor.name) {
289+
continue;
290+
}
291+
pthread_mutex_lock(&anchor.lock);
292+
for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
293+
if (anchor.is_capturing[j]) {
294+
HW_read(anchor.active_contexts[j], &anchor.end[j]);
295+
collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
296+
anchor.start[j] = anchor.end[j];
297+
anchor.is_capturing[j] = false;
298+
}
299+
}
300+
pthread_mutex_unlock(&anchor.lock);
301+
}
292302
g_profiler.profiling = false;
293-
pthread_mutex_unlock(&g_profiler.global_lock);
294-
}
295-
296-
void cputrace_stop(ceph::Formatter* f) {
297-
pthread_mutex_lock(&g_profiler.global_lock);
298-
if (!g_profiler.profiling) {
303+
if (f) {
299304
f->open_object_section("cputrace_stop");
300-
f->dump_format("status", "Profiling not active");
305+
f->dump_format("status", "Profiling stopped");
301306
f->close_section();
302-
pthread_mutex_unlock(&g_profiler.global_lock);
303-
return;
304307
}
305-
g_profiler.profiling = false;
306-
pthread_mutex_unlock(&g_profiler.global_lock);
307-
f->open_object_section("cputrace_stop");
308-
f->dump_format("status", "Profiling stopped");
309-
f->close_section();
310-
}
311-
312-
void cputrace_reset() {
313-
pthread_mutex_lock(&g_profiler.global_lock);
314-
for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
315-
if (!g_profiler.anchors[i].name) continue;
316-
pthread_mutex_lock(&g_profiler.anchors[i].lock);
317-
g_profiler.anchors[i].global_results.reset();
318-
pthread_mutex_unlock(&g_profiler.anchors[i].lock);
319-
}
320-
pthread_mutex_unlock(&g_profiler.global_lock);
321308
}
322309

323310
void cputrace_reset(ceph::Formatter* f) {
324-
pthread_mutex_lock(&g_profiler.global_lock);
325311
for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
326312
if (!g_profiler.anchors[i].name) continue;
327313
pthread_mutex_lock(&g_profiler.anchors[i].lock);
328314
g_profiler.anchors[i].global_results.reset();
329315
pthread_mutex_unlock(&g_profiler.anchors[i].lock);
330316
}
331-
f->open_object_section("cputrace_reset");
332-
f->dump_format("status", "Counters reset");
333-
f->close_section();
334-
pthread_mutex_unlock(&g_profiler.global_lock);
317+
if (f) {
318+
f->open_object_section("cputrace_reset");
319+
f->dump_format("status", "Counters reset");
320+
f->close_section();
321+
}
335322
}
336323

337324
void cputrace_dump(ceph::Formatter* f, const std::string& logger, const std::string& counter) {
338-
pthread_mutex_lock(&g_profiler.global_lock);
339325
f->open_object_section("cputrace");
340326
bool dumped = false;
341327

@@ -347,7 +333,7 @@ void cputrace_dump(ceph::Formatter* f, const std::string& logger, const std::str
347333

348334
pthread_mutex_lock(&anchor.lock);
349335
for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
350-
if (anchor.is_capturing[j] && g_profiler.profiling) {
336+
if (anchor.is_capturing[j] && g_profiler.profiling.load()) {
351337
HW_read(anchor.active_contexts[j], &anchor.end[j]);
352338
collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
353339
anchor.start[j] = anchor.end[j];
@@ -363,11 +349,9 @@ void cputrace_dump(ceph::Formatter* f, const std::string& logger, const std::str
363349

364350
f->dump_format("status", dumped ? "Profiling data dumped" : "No profiling data available");
365351
f->close_section();
366-
pthread_mutex_unlock(&g_profiler.global_lock);
367352
}
368353

369354
void cputrace_print_to_stringstream(std::stringstream& ss) {
370-
pthread_mutex_lock(&g_profiler.global_lock);
371355
ss << "cputrace:\n";
372356
bool dumped = false;
373357

@@ -379,7 +363,7 @@ void cputrace_print_to_stringstream(std::stringstream& ss) {
379363

380364
pthread_mutex_lock(&anchor.lock);
381365
for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
382-
if (anchor.is_capturing[j]) {
366+
if (anchor.is_capturing[j] && g_profiler.profiling.load()) {
383367
HW_read(anchor.active_contexts[j], &anchor.end[j]);
384368
collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
385369
anchor.start[j] = anchor.end[j];
@@ -393,7 +377,6 @@ void cputrace_print_to_stringstream(std::stringstream& ss) {
393377
}
394378

395379
ss << "status: " << (dumped ? "Profiling data dumped" : "No profiling data available") << "\n";
396-
pthread_mutex_unlock(&g_profiler.global_lock);
397380
}
398381

399382
__attribute__((constructor)) static void cputrace_init() {
@@ -409,10 +392,6 @@ __attribute__((constructor)) static void cputrace_init() {
409392
}
410393

411394
}
412-
if (pthread_mutex_init(&g_profiler.global_lock, nullptr) != 0) {
413-
fprintf(stderr, "Failed to initialize global mutex: %s\n", strerror(errno));
414-
exit(1);
415-
}
416395
}
417396

418397
__attribute__((destructor)) static void cputrace_fini() {
@@ -431,9 +410,6 @@ __attribute__((destructor)) static void cputrace_fini() {
431410
fprintf(stderr, "Failed to destroy mutex for anchor %d: %s\n", i, strerror(errno));
432411
}
433412
}
434-
if (pthread_mutex_destroy(&g_profiler.global_lock) != 0) {
435-
fprintf(stderr, "Failed to destroy global mutex: %s\n", strerror(errno));
436-
}
437413
free(g_profiler.anchors);
438414
g_profiler.anchors = nullptr;
439415
}

src/common/cputrace.h

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@
1717
#include <string>
1818
#include <unordered_map>
1919
#include <mutex>
20+
#include <sstream>
21+
#include <atomic>
22+
2023
#include "common/Formatter.h"
24+
#include "include/ceph_assert.h"
2125

2226
#define CPUTRACE_MAX_ANCHORS 10
2327
#define CPUTRACE_MAX_THREADS 64
@@ -40,7 +44,11 @@ inline cputrace_flags operator&(cputrace_flags a, cputrace_flags b) {
4044
static_cast<uint64_t>(a) & static_cast<uint64_t>(b));
4145
}
4246

43-
#define HWProfileFunctionF(var, name, flags) HW_profile var(name, __COUNTER__ + 1, flags)
47+
int register_anchor(const char* name);
48+
49+
#define HWProfileFunctionF(var, name, flags) \
50+
static int var##_id = register_anchor(name); \
51+
HW_profile var(name, var##_id, flags)
4452

4553
struct sample_t {
4654
uint64_t swi = 0;
@@ -201,8 +209,7 @@ struct cputrace_anchor {
201209

202210
struct cputrace_profiler {
203211
cputrace_anchor* anchors = nullptr;
204-
bool profiling = false;
205-
pthread_mutex_t global_lock = PTHREAD_MUTEX_INITIALIZER;
212+
std::atomic<bool> profiling{false};
206213
};
207214

208215
class HW_profile {
@@ -249,17 +256,14 @@ class HW_named_guard {
249256

250257
private:
251258
const char* name = nullptr;
252-
HW_guard* guard{nullptr};
259+
HW_guard guard;
253260
};
254261

255262
measurement_t* get_named_measurement(const std::string& name);
256263

257-
void cputrace_start();
258-
void cputrace_stop();
259-
void cputrace_reset();
260-
void cputrace_start(ceph::Formatter* f);
261-
void cputrace_stop(ceph::Formatter* f);
262-
void cputrace_reset(ceph::Formatter* f);
264+
void cputrace_start(ceph::Formatter* f = nullptr);
265+
void cputrace_stop(ceph::Formatter* f = nullptr);
266+
void cputrace_reset(ceph::Formatter* f = nullptr);
263267
void cputrace_dump(ceph::Formatter* f, const std::string& logger = "", const std::string& counter = "");
264268
void cputrace_print_to_stringstream(std::stringstream& ss);
265269

0 commit comments

Comments
 (0)