Skip to content

Commit 2491a6c

Browse files
common: cputrace refactor results to measurement_t
Signed-off-by: Jaya Prakash <[email protected]>
1 parent 44b7f00 commit 2491a6c

File tree

2 files changed

+271
-142
lines changed

2 files changed

+271
-142
lines changed

src/common/cputrace.cc

Lines changed: 67 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
1+
/*
2+
* CpuTrace: lightweight hardware performance counter profiling
3+
*
4+
* Implementation details.
5+
*
6+
* See detailed documentation and usage examples in:
7+
* doc/dev/cputrace.rst
8+
*
9+
* This file contains the low-level implementation of CpuTrace,
10+
* including perf_event setup, context management, and RAII
11+
* profiling helpers.
12+
*/
13+
114
#include "cputrace.h"
2-
#include "common/Formatter.h"
315

416
#include <linux/perf_event.h>
517
#include <asm/unistd.h>
@@ -10,14 +22,15 @@
1022
#include <stdlib.h>
1123
#include <stdio.h>
1224
#include <string.h>
13-
#include <cstring>
1425
#include <thread>
1526

1627
#define PROFILE_ASSERT(x) if (!(x)) { fprintf(stderr, "Assert failed %s:%d\n", __FILE__, __LINE__); exit(1); }
1728

1829
static thread_local uint64_t thread_id_hash;
1930
static thread_local bool thread_id_initialized;
2031
static cputrace_profiler g_profiler;
32+
static std::unordered_map<std::string, measurement_t> g_named_measurements;
33+
static std::mutex g_named_measurements_lock;
2134

2235
struct read_format {
2336
uint64_t nr;
@@ -76,19 +89,27 @@ static void close_perf_fd(int& fd) {
7689
}
7790
}
7891

79-
void HW_init(HW_ctx* ctx, uint64_t flags) {
92+
HW_ctx HW_ctx_empty = {
93+
-1, -1, -1, -1, -1, -1,
94+
0, 0, 0, 0, 0
95+
};
96+
97+
void HW_init(HW_ctx* ctx, cputrace_flags flags) {
8098
struct perf_event_attr pe;
8199
int parent_fd = -1;
82100

83101
if (flags & HW_PROFILE_SWI) {
84102
setup_perf_event(&pe, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES);
85103
open_perf_fd(ctx->fd_swi, ctx->id_swi, &pe, "SWI", -1);
86104
parent_fd = ctx->fd_swi;
87-
}
88-
else if (flags & HW_PROFILE_CYC) {
105+
} else if (flags & HW_PROFILE_CYC) {
89106
setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
90107
open_perf_fd(ctx->fd_cyc, ctx->id_cyc, &pe, "CYC", -1);
91108
parent_fd = ctx->fd_cyc;
109+
} else if (flags & HW_PROFILE_INS) {
110+
setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS);
111+
open_perf_fd(ctx->fd_ins, ctx->id_ins, &pe, "INS", -1);
112+
parent_fd = ctx->fd_ins;
92113
} else if (flags & HW_PROFILE_CMISS) {
93114
setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES);
94115
open_perf_fd(ctx->fd_cmiss, ctx->id_cmiss, &pe, "CMISS", -1);
@@ -97,10 +118,6 @@ void HW_init(HW_ctx* ctx, uint64_t flags) {
97118
setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES);
98119
open_perf_fd(ctx->fd_bmiss, ctx->id_bmiss, &pe, "BMISS", -1);
99120
parent_fd = ctx->fd_bmiss;
100-
} else if (flags & HW_PROFILE_INS) {
101-
setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS);
102-
open_perf_fd(ctx->fd_ins, ctx->id_ins, &pe, "INS", -1);
103-
parent_fd = ctx->fd_ins;
104121
}
105122

106123
ctx->parent_fd = parent_fd;
@@ -165,30 +182,19 @@ void HW_read(HW_ctx* ctx, sample_t* measure) {
165182
}
166183

167184
static void collect_samples(sample_t* start, sample_t* end, cputrace_anchor* anchor) {
168-
if (end->swi) {
169-
anchor->global_results.swi += end->swi - start->swi;
170-
}
171-
if (end->cyc) {
172-
anchor->global_results.cyc += end->cyc - start->cyc;
173-
}
174-
if (end->cmiss) {
175-
anchor->global_results.cmiss += end->cmiss - start->cmiss;
176-
}
177-
if (end->bmiss) {
178-
anchor->global_results.bmiss += end->bmiss - start->bmiss;
179-
}
180-
if (end->ins) {
181-
anchor->global_results.ins += end->ins - start->ins;
182-
}
185+
sample_t elapsed = *end - *start;
186+
anchor->global_results.sample(elapsed);
183187
}
184188

185-
HW_profile::HW_profile(const char* function, uint64_t index, uint64_t flags)
189+
HW_profile::HW_profile(const char* function, uint64_t index, cputrace_flags flags)
186190
: function(function), index(index), flags(flags) {
187-
if (index >= CPUTRACE_MAX_ANCHORS || !g_profiler.profiling)
191+
pthread_mutex_lock(&g_profiler.global_lock);
192+
if (index >= CPUTRACE_MAX_ANCHORS || !g_profiler.profiling) {
193+
pthread_mutex_unlock(&g_profiler.global_lock);
188194
return;
189-
195+
}
196+
pthread_mutex_unlock(&g_profiler.global_lock);
190197
uint64_t tid = get_thread_id();
191-
192198
cputrace_anchor& anchor = g_profiler.anchors[index];
193199
pthread_mutex_lock(&anchor.lock);
194200
anchor.name = function;
@@ -213,23 +219,44 @@ HW_profile::HW_profile(const char* function, uint64_t index, uint64_t flags)
213219
}
214220

215221
HW_profile::~HW_profile() {
216-
if (!g_profiler.profiling || index >= CPUTRACE_MAX_ANCHORS)
217-
return;
218-
219222
cputrace_anchor& anchor = g_profiler.anchors[index];
220223
uint64_t tid = get_thread_id();
221-
224+
pthread_mutex_lock(&g_profiler.global_lock);
225+
if (!g_profiler.profiling || index >= CPUTRACE_MAX_ANCHORS){
226+
pthread_mutex_lock(&anchor.lock);
227+
anchor.is_capturing[tid] = false;
228+
pthread_mutex_unlock(&anchor.lock);
229+
pthread_mutex_unlock(&g_profiler.global_lock);
230+
return;
231+
}
232+
pthread_mutex_unlock(&g_profiler.global_lock);
222233
pthread_mutex_lock(&anchor.lock);
223234
anchor.nest_level[tid]--;
224235
if (anchor.nest_level[tid] == 0) {
225236
HW_read(ctx, &anchor.end[tid]);
226237
collect_samples(&anchor.start[tid], &anchor.end[tid], &anchor);
227-
std::memcpy(&anchor.start[tid], &anchor.end[tid], sizeof(anchor.start[tid]));
238+
anchor.start[tid] = anchor.end[tid];
228239
anchor.is_capturing[tid] = false;
229240
}
230241
pthread_mutex_unlock(&anchor.lock);
231242
}
232243

244+
measurement_t* get_named_measurement(const std::string& name) {
245+
std::lock_guard<std::mutex> g(g_named_measurements_lock);
246+
return &g_named_measurements[name];
247+
}
248+
249+
HW_named_guard::HW_named_guard(const char* name, HW_ctx* ctx)
250+
: name(name)
251+
{
252+
measurement_t* meas = get_named_measurement(name);
253+
guard = new HW_guard(ctx, meas);
254+
}
255+
256+
HW_named_guard::~HW_named_guard() {
257+
delete guard;
258+
}
259+
233260
void cputrace_start() {
234261
pthread_mutex_lock(&g_profiler.global_lock);
235262
if (g_profiler.profiling) {
@@ -287,7 +314,7 @@ void cputrace_reset() {
287314
for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
288315
if (!g_profiler.anchors[i].name) continue;
289316
pthread_mutex_lock(&g_profiler.anchors[i].lock);
290-
g_profiler.anchors[i].global_results = results{};
317+
g_profiler.anchors[i].global_results.reset();
291318
pthread_mutex_unlock(&g_profiler.anchors[i].lock);
292319
}
293320
pthread_mutex_unlock(&g_profiler.global_lock);
@@ -298,7 +325,7 @@ void cputrace_reset(ceph::Formatter* f) {
298325
for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
299326
if (!g_profiler.anchors[i].name) continue;
300327
pthread_mutex_lock(&g_profiler.anchors[i].lock);
301-
g_profiler.anchors[i].global_results = results{};
328+
g_profiler.anchors[i].global_results.reset();
302329
pthread_mutex_unlock(&g_profiler.anchors[i].lock);
303330
}
304331
f->open_object_section("cputrace_reset");
@@ -320,42 +347,16 @@ void cputrace_dump(ceph::Formatter* f, const std::string& logger, const std::str
320347

321348
pthread_mutex_lock(&anchor.lock);
322349
for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
323-
if (anchor.is_capturing[j]) {
350+
if (anchor.is_capturing[j] && g_profiler.profiling) {
324351
HW_read(anchor.active_contexts[j], &anchor.end[j]);
325352
collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
326-
std::memcpy(&anchor.start[j], &anchor.end[j], sizeof(anchor.start[j]));
353+
anchor.start[j] = anchor.end[j];
327354
}
328355
}
329356
pthread_mutex_unlock(&anchor.lock);
330357

331358
f->open_object_section(anchor.name);
332-
f->dump_unsigned("call_count", anchor.global_results.call_count);
333-
334-
if (anchor.flags & HW_PROFILE_SWI && (counter.empty() || counter == "context_switches")) {
335-
f->dump_unsigned("context_switches", anchor.global_results.swi);
336-
if (anchor.global_results.call_count)
337-
f->dump_float("avg_context_switches", (double)anchor.global_results.swi / anchor.global_results.call_count);
338-
}
339-
if (anchor.flags & HW_PROFILE_CYC && (counter.empty() || counter == "cpu_cycles")) {
340-
f->dump_unsigned("cpu_cycles", anchor.global_results.cyc);
341-
if (anchor.global_results.call_count)
342-
f->dump_float("avg_cpu_cycles", (double)anchor.global_results.cyc / anchor.global_results.call_count);
343-
}
344-
if (anchor.flags & HW_PROFILE_CMISS && (counter.empty() || counter == "cache_misses")) {
345-
f->dump_unsigned("cache_misses", anchor.global_results.cmiss);
346-
if (anchor.global_results.call_count)
347-
f->dump_float("avg_cache_misses", (double)anchor.global_results.cmiss / anchor.global_results.call_count);
348-
}
349-
if (anchor.flags & HW_PROFILE_BMISS && (counter.empty() || counter == "branch_misses")) {
350-
f->dump_unsigned("branch_misses", anchor.global_results.bmiss);
351-
if (anchor.global_results.call_count)
352-
f->dump_float("avg_branch_misses", (double)anchor.global_results.bmiss / anchor.global_results.call_count);
353-
}
354-
if (anchor.flags & HW_PROFILE_INS && (counter.empty() || counter == "instructions")) {
355-
f->dump_unsigned("instructions", anchor.global_results.ins);
356-
if (anchor.global_results.call_count)
357-
f->dump_float("avg_instructions", (double)anchor.global_results.ins / anchor.global_results.call_count);
358-
}
359+
anchor.global_results.dump(f, anchor.flags, counter);
359360
f->close_section();
360361
dumped = true;
361362
}
@@ -381,49 +382,13 @@ void cputrace_print_to_stringstream(std::stringstream& ss) {
381382
if (anchor.is_capturing[j]) {
382383
HW_read(anchor.active_contexts[j], &anchor.end[j]);
383384
collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
384-
std::memcpy(&anchor.start[j], &anchor.end[j], sizeof(anchor.start[j]));
385+
anchor.start[j] = anchor.end[j];
385386
}
386387
}
387388
pthread_mutex_unlock(&anchor.lock);
388389

389390
ss << " " << anchor.name << ":\n";
390-
ss << " call_count: " << anchor.global_results.call_count << "\n";
391-
392-
if (anchor.flags & HW_PROFILE_SWI) {
393-
ss << " context_switches: " << anchor.global_results.swi;
394-
if (anchor.global_results.call_count) {
395-
ss << "\n avg_context_switches: " << (double)anchor.global_results.swi / anchor.global_results.call_count;
396-
}
397-
ss << "\n";
398-
}
399-
if (anchor.flags & HW_PROFILE_CYC) {
400-
ss << " cpu_cycles: " << anchor.global_results.cyc;
401-
if (anchor.global_results.call_count) {
402-
ss << "\n avg_cpu_cycles: " << (double)anchor.global_results.cyc / anchor.global_results.call_count;
403-
}
404-
ss << "\n";
405-
}
406-
if (anchor.flags & HW_PROFILE_CMISS) {
407-
ss << " cache_misses: " << anchor.global_results.cmiss;
408-
if (anchor.global_results.call_count) {
409-
ss << "\n avg_cache_misses: " << (double)anchor.global_results.cmiss / anchor.global_results.call_count;
410-
}
411-
ss << "\n";
412-
}
413-
if (anchor.flags & HW_PROFILE_BMISS) {
414-
ss << " branch_misses: " << anchor.global_results.bmiss;
415-
if (anchor.global_results.call_count) {
416-
ss << "\n avg_branch_misses: " << (double)anchor.global_results.bmiss / anchor.global_results.call_count;
417-
}
418-
ss << "\n";
419-
}
420-
if (anchor.flags & HW_PROFILE_INS) {
421-
ss << " instructions: " << anchor.global_results.ins;
422-
if (anchor.global_results.call_count) {
423-
ss << "\n avg_instructions: " << (double)anchor.global_results.ins / anchor.global_results.call_count;
424-
}
425-
ss << "\n";
426-
}
391+
anchor.global_results.dump_to_stringstream(ss, anchor.flags);
427392
dumped = true;
428393
}
429394

0 commit comments

Comments
 (0)