Skip to content

Commit 44b7f00

Browse files
common: cputrace HW_ctx was not cleared after use and perf counters were not stopped
Signed-off-by: Jaya Prakash <[email protected]>
1 parent 6e05fe9 commit 44b7f00

File tree

2 files changed

+97
-65
lines changed

2 files changed

+97
-65
lines changed

src/common/cputrace.cc

Lines changed: 68 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@
1010
#include <stdlib.h>
1111
#include <stdio.h>
1212
#include <string.h>
13+
#include <cstring>
1314
#include <thread>
1415

1516
#define PROFILE_ASSERT(x) if (!(x)) { fprintf(stderr, "Assert failed %s:%d\n", __FILE__, __LINE__); exit(1); }
1617

1718
static thread_local uint64_t thread_id_hash;
1819
static thread_local bool thread_id_initialized;
1920
static cputrace_profiler g_profiler;
20-
static HW_ctx* active_contexts[CPUTRACE_MAX_ANCHORS][CPUTRACE_MAX_THREADS] = {{nullptr}};
2121

2222
struct read_format {
2323
uint64_t nr;
@@ -62,7 +62,7 @@ static void open_perf_fd(int& fd, uint64_t& id, struct perf_event_attr* pe, cons
6262
if (fd != -1) {
6363
ioctl(fd, PERF_EVENT_IOC_ID, &id);
6464
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
65-
} else {
65+
} else {
6666
fprintf(stderr, "Failed to open perf event for %s: %s\n", name, strerror(errno));
6767
id = 0;
6868
}
@@ -164,31 +164,22 @@ void HW_read(HW_ctx* ctx, sample_t* measure) {
164164
}
165165
}
166166

167-
static void read_perf_event(HW_ctx* ctx, cputrace_anchor* anchor) {
168-
pthread_mutex_lock(&anchor->lock);
169-
if (ctx->parent_fd != -1) {
170-
sample_t measure;
171-
HW_read(ctx, &measure);
172-
if (measure.swi) {
173-
anchor->global_results.swi += measure.swi;
174-
}
175-
if (measure.cyc) {
176-
anchor->global_results.cyc += measure.cyc;
177-
}
178-
if (measure.cmiss) {
179-
anchor->global_results.cmiss += measure.cmiss;
180-
}
181-
if (measure.bmiss) {
182-
anchor->global_results.bmiss += measure.bmiss;
183-
}
184-
if (measure.ins) {
185-
anchor->global_results.ins += measure.ins;
186-
}
187-
if (ioctl(ctx->parent_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) != 0) {
188-
fprintf(stderr, "Failed to reset perf counters: %s\n", strerror(errno));
189-
}
167+
static void collect_samples(sample_t* start, sample_t* end, cputrace_anchor* anchor) {
168+
if (end->swi) {
169+
anchor->global_results.swi += end->swi - start->swi;
170+
}
171+
if (end->cyc) {
172+
anchor->global_results.cyc += end->cyc - start->cyc;
173+
}
174+
if (end->cmiss) {
175+
anchor->global_results.cmiss += end->cmiss - start->cmiss;
176+
}
177+
if (end->bmiss) {
178+
anchor->global_results.bmiss += end->bmiss - start->bmiss;
179+
}
180+
if (end->ins) {
181+
anchor->global_results.ins += end->ins - start->ins;
190182
}
191-
pthread_mutex_unlock(&anchor->lock);
192183
}
193184

194185
HW_profile::HW_profile(const char* function, uint64_t index, uint64_t flags)
@@ -199,31 +190,44 @@ HW_profile::HW_profile(const char* function, uint64_t index, uint64_t flags)
199190
uint64_t tid = get_thread_id();
200191

201192
cputrace_anchor& anchor = g_profiler.anchors[index];
193+
pthread_mutex_lock(&anchor.lock);
202194
anchor.name = function;
203195
anchor.flags = flags;
204196

205-
pthread_mutex_lock(&anchor.lock);
206197
anchor.global_results.call_count += 1;
207-
pthread_mutex_unlock(&anchor.lock);
208-
209-
ctx = HW_ctx_empty;
210-
HW_init(&ctx, flags);
211198

212-
pthread_mutex_lock(&g_profiler.global_lock);
213-
active_contexts[index][tid] = &ctx;
214-
pthread_mutex_unlock(&g_profiler.global_lock);
199+
if (anchor.active_contexts[tid] == nullptr) {
200+
ctx = &anchor.per_thread_ctx[tid];
201+
*ctx = HW_ctx_empty;
202+
HW_init(ctx, flags);
203+
anchor.active_contexts[tid] = ctx;
204+
} else {
205+
ctx = anchor.active_contexts[tid];
206+
}
207+
anchor.nest_level[tid]++;
208+
if (anchor.nest_level[tid] == 1) {
209+
HW_read(ctx, &anchor.start[tid]);
210+
}
211+
anchor.is_capturing[tid] = true;
212+
pthread_mutex_unlock(&anchor.lock);
215213
}
216214

217215
HW_profile::~HW_profile() {
218216
if (!g_profiler.profiling || index >= CPUTRACE_MAX_ANCHORS)
219217
return;
220218

219+
cputrace_anchor& anchor = g_profiler.anchors[index];
221220
uint64_t tid = get_thread_id();
222-
pthread_mutex_lock(&g_profiler.global_lock);
223-
read_perf_event(&ctx, &g_profiler.anchors[index]);
224-
HW_clean(&ctx);
225-
active_contexts[index][tid] = nullptr;
226-
pthread_mutex_unlock(&g_profiler.global_lock);
221+
222+
pthread_mutex_lock(&anchor.lock);
223+
anchor.nest_level[tid]--;
224+
if (anchor.nest_level[tid] == 0) {
225+
HW_read(ctx, &anchor.end[tid]);
226+
collect_samples(&anchor.start[tid], &anchor.end[tid], &anchor);
227+
std::memcpy(&anchor.start[tid], &anchor.end[tid], sizeof(anchor.start[tid]));
228+
anchor.is_capturing[tid] = false;
229+
}
230+
pthread_mutex_unlock(&anchor.lock);
227231
}
228232

229233
void cputrace_start() {
@@ -309,16 +313,20 @@ void cputrace_dump(ceph::Formatter* f, const std::string& logger, const std::str
309313
bool dumped = false;
310314

311315
for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
312-
const auto& anchor = g_profiler.anchors[i];
316+
cputrace_anchor& anchor = g_profiler.anchors[i];
313317
if (!anchor.name || (!logger.empty() && anchor.name != logger)) {
314318
continue;
315319
}
316320

321+
pthread_mutex_lock(&anchor.lock);
317322
for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
318-
if (active_contexts[i][j]) {
319-
read_perf_event(active_contexts[i][j], &g_profiler.anchors[i]);
323+
if (anchor.is_capturing[j]) {
324+
HW_read(anchor.active_contexts[j], &anchor.end[j]);
325+
collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
326+
std::memcpy(&anchor.start[j], &anchor.end[j], sizeof(anchor.start[j]));
320327
}
321328
}
329+
pthread_mutex_unlock(&anchor.lock);
322330

323331
f->open_object_section(anchor.name);
324332
f->dump_unsigned("call_count", anchor.global_results.call_count);
@@ -363,16 +371,20 @@ void cputrace_print_to_stringstream(std::stringstream& ss) {
363371
bool dumped = false;
364372

365373
for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
366-
const auto& anchor = g_profiler.anchors[i];
374+
cputrace_anchor& anchor = g_profiler.anchors[i];
367375
if (!anchor.name) {
368376
continue;
369377
}
370378

379+
pthread_mutex_lock(&anchor.lock);
371380
for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
372-
if (active_contexts[i][j]) {
373-
read_perf_event(active_contexts[i][j], &g_profiler.anchors[i]);
381+
if (anchor.is_capturing[j]) {
382+
HW_read(anchor.active_contexts[j], &anchor.end[j]);
383+
collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
384+
std::memcpy(&anchor.start[j], &anchor.end[j], sizeof(anchor.start[j]));
374385
}
375386
}
387+
pthread_mutex_unlock(&anchor.lock);
376388

377389
ss << " " << anchor.name << ":\n";
378390
ss << " call_count: " << anchor.global_results.call_count << "\n";
@@ -430,6 +442,7 @@ __attribute__((constructor)) static void cputrace_init() {
430442
fprintf(stderr, "Failed to initialize mutex for anchor %d: %s\n", i, strerror(errno));
431443
exit(1);
432444
}
445+
433446
}
434447
if (pthread_mutex_init(&g_profiler.global_lock, nullptr) != 0) {
435448
fprintf(stderr, "Failed to initialize global mutex: %s\n", strerror(errno));
@@ -439,6 +452,16 @@ __attribute__((constructor)) static void cputrace_init() {
439452

440453
__attribute__((destructor)) static void cputrace_fini() {
441454
for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
455+
cputrace_anchor& anchor = g_profiler.anchors[i];
456+
pthread_mutex_lock(&anchor.lock);
457+
for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
458+
if (anchor.active_contexts[j] != nullptr) {
459+
HW_clean(&anchor.per_thread_ctx[j]);
460+
anchor.active_contexts[j] = nullptr;
461+
anchor.is_capturing[j] = false;
462+
}
463+
}
464+
pthread_mutex_unlock(&anchor.lock);
442465
if (pthread_mutex_destroy(&g_profiler.anchors[i].lock) != 0) {
443466
fprintf(stderr, "Failed to destroy mutex for anchor %d: %s\n", i, strerror(errno));
444467
}

src/common/cputrace.h

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
#pragma once
1+
#define CPUTRACE_H
2+
#ifdef CPUTRACE_H
3+
24
#include <pthread.h>
35
#include <stdint.h>
46
#include <string>
@@ -26,17 +28,12 @@ struct results {
2628
uint64_t ins;
2729
};
2830

29-
struct cputrace_anchor {
30-
const char* name;
31-
pthread_mutex_t lock;
32-
results global_results;
33-
uint64_t flags;
34-
};
35-
36-
struct cputrace_profiler {
37-
cputrace_anchor* anchors;
38-
bool profiling;
39-
pthread_mutex_t global_lock;
31+
struct sample_t {
32+
uint64_t swi = 0;
33+
uint64_t cyc = 0;
34+
uint64_t cmiss = 0;
35+
uint64_t bmiss = 0;
36+
uint64_t ins = 0;
4037
};
4138

4239
struct HW_ctx {
@@ -58,14 +55,24 @@ constexpr HW_ctx HW_ctx_empty = {
5855
0, 0, 0, 0, 0
5956
};
6057

61-
struct sample_t {
62-
uint64_t swi = 0;
63-
uint64_t cyc = 0;
64-
uint64_t cmiss = 0;
65-
uint64_t bmiss = 0;
66-
uint64_t ins = 0;
58+
struct cputrace_anchor {
59+
const char* name;
60+
pthread_mutex_t lock;
61+
results global_results;
62+
uint64_t flags;
63+
HW_ctx per_thread_ctx[CPUTRACE_MAX_THREADS];
64+
HW_ctx* active_contexts[CPUTRACE_MAX_THREADS] = {nullptr};
65+
sample_t start[CPUTRACE_MAX_THREADS];
66+
sample_t end[CPUTRACE_MAX_THREADS];
67+
bool is_capturing[CPUTRACE_MAX_THREADS] = {false};
68+
uint32_t nest_level[CPUTRACE_MAX_THREADS] = {0};
6769
};
6870

71+
struct cputrace_profiler {
72+
cputrace_anchor* anchors;
73+
bool profiling;
74+
pthread_mutex_t global_lock;
75+
};
6976

7077
class HW_profile {
7178
public:
@@ -76,7 +83,7 @@ class HW_profile {
7683
const char* function;
7784
uint64_t index;
7885
uint64_t flags;
79-
struct HW_ctx ctx;
86+
struct HW_ctx* ctx;
8087
};
8188

8289
void HW_init(HW_ctx* ctx, uint64_t flags);
@@ -90,4 +97,6 @@ void cputrace_start(ceph::Formatter* f);
9097
void cputrace_stop(ceph::Formatter* f);
9198
void cputrace_reset(ceph::Formatter* f);
9299
void cputrace_dump(ceph::Formatter* f, const std::string& logger = "", const std::string& counter = "");
93-
void cputrace_print_to_stringstream(std::stringstream& ss);
100+
void cputrace_print_to_stringstream(std::stringstream& ss);
101+
102+
#endif

0 commit comments

Comments
 (0)