Skip to content

Commit 2e9f9d4

Browse files
captain5050namhyung
authored andcommitted
perf annotation: Switch lock from a mutex to a sharded_mutex
Remove the "struct mutex lock" variable from annotation that is allocated per symbol. This removes in the region of 40 bytes per symbol allocation. Use a sharded mutex where the number of shards is set to the number of CPUs. Assuming good hashing of the annotation (done based on the pointer), this means in order to contend there needs to be more threads than CPUs, which is not currently true in any perf command. Were contention an issue it is straightforward to increase the number of shards in the mutex. On my Debian/glibc based machine, this reduces the size of struct annotation from 136 bytes to 96 bytes, or nearly 30%. Signed-off-by: Ian Rogers <[email protected]> Acked-by: Namhyung Kim <[email protected]> Cc: Andres Freund <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Yuan Can <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Adrian Hunter <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Huacai Chen <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Masami Hiramatsu <[email protected]> Cc: Alexander Shishkin <[email protected]> Cc: Kan Liang <[email protected]> Cc: Ingo Molnar <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Namhyung Kim <[email protected]>
1 parent 0650b2b commit 2e9f9d4

File tree

4 files changed

+77
-24
lines changed

4 files changed

+77
-24
lines changed

tools/perf/builtin-top.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -137,10 +137,10 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
137137
}
138138

139139
notes = symbol__annotation(sym);
140-
mutex_lock(&notes->lock);
140+
annotation__lock(notes);
141141

142142
if (!symbol__hists(sym, top->evlist->core.nr_entries)) {
143-
mutex_unlock(&notes->lock);
143+
annotation__unlock(notes);
144144
pr_err("Not enough memory for annotating '%s' symbol!\n",
145145
sym->name);
146146
sleep(1);
@@ -156,7 +156,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
156156
pr_err("Couldn't annotate %s: %s\n", sym->name, msg);
157157
}
158158

159-
mutex_unlock(&notes->lock);
159+
annotation__unlock(notes);
160160
return err;
161161
}
162162

@@ -211,12 +211,12 @@ static void perf_top__record_precise_ip(struct perf_top *top,
211211

212212
notes = symbol__annotation(sym);
213213

214-
if (!mutex_trylock(&notes->lock))
214+
if (!annotation__trylock(notes))
215215
return;
216216

217217
err = hist_entry__inc_addr_samples(he, sample, evsel, ip);
218218

219-
mutex_unlock(&notes->lock);
219+
annotation__unlock(notes);
220220

221221
if (unlikely(err)) {
222222
/*
@@ -253,7 +253,7 @@ static void perf_top__show_details(struct perf_top *top)
253253
symbol = he->ms.sym;
254254
notes = symbol__annotation(symbol);
255255

256-
mutex_lock(&notes->lock);
256+
annotation__lock(notes);
257257

258258
symbol__calc_percent(symbol, evsel);
259259

@@ -274,7 +274,7 @@ static void perf_top__show_details(struct perf_top *top)
274274
if (more != 0)
275275
printf("%d lines not displayed, maybe increase display entries [e]\n", more);
276276
out_unlock:
277-
mutex_unlock(&notes->lock);
277+
annotation__unlock(notes);
278278
}
279279

280280
static void perf_top__resort_hists(struct perf_top *t)

tools/perf/ui/browsers/annotate.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
314314

315315
browser->entries = RB_ROOT;
316316

317-
mutex_lock(&notes->lock);
317+
annotation__lock(notes);
318318

319319
symbol__calc_percent(sym, evsel);
320320

@@ -343,7 +343,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
343343
}
344344
disasm_rb_tree__insert(browser, &pos->al);
345345
}
346-
mutex_unlock(&notes->lock);
346+
annotation__unlock(notes);
347347

348348
browser->curr_hot = rb_last(&browser->entries);
349349
}
@@ -470,10 +470,10 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
470470
}
471471

472472
notes = symbol__annotation(dl->ops.target.sym);
473-
mutex_lock(&notes->lock);
473+
annotation__lock(notes);
474474

475475
if (!symbol__hists(dl->ops.target.sym, evsel->evlist->core.nr_entries)) {
476-
mutex_unlock(&notes->lock);
476+
annotation__unlock(notes);
477477
ui__warning("Not enough memory for annotating '%s' symbol!\n",
478478
dl->ops.target.sym->name);
479479
return true;
@@ -482,7 +482,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
482482
target_ms.maps = ms->maps;
483483
target_ms.map = ms->map;
484484
target_ms.sym = dl->ops.target.sym;
485-
mutex_unlock(&notes->lock);
485+
annotation__unlock(notes);
486486
symbol__tui_annotate(&target_ms, evsel, hbt, browser->opts);
487487
sym_title(ms->sym, ms->map, title, sizeof(title), browser->opts->percent_type);
488488
ui_browser__show_title(&browser->b, title);

tools/perf/util/annotate.c

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "block-range.h"
3333
#include "string2.h"
3434
#include "util/event.h"
35+
#include "util/sharded_mutex.h"
3536
#include "arch/common.h"
3637
#include "namespaces.h"
3738
#include <regex.h>
@@ -856,15 +857,15 @@ void symbol__annotate_zero_histograms(struct symbol *sym)
856857
{
857858
struct annotation *notes = symbol__annotation(sym);
858859

859-
mutex_lock(&notes->lock);
860+
annotation__lock(notes);
860861
if (notes->src != NULL) {
861862
memset(notes->src->histograms, 0,
862863
notes->src->nr_histograms * notes->src->sizeof_sym_hist);
863864
if (notes->src->cycles_hist)
864865
memset(notes->src->cycles_hist, 0,
865866
symbol__size(sym) * sizeof(struct cyc_hist));
866867
}
867-
mutex_unlock(&notes->lock);
868+
annotation__unlock(notes);
868869
}
869870

870871
static int __symbol__account_cycles(struct cyc_hist *ch,
@@ -1121,7 +1122,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size)
11211122
notes->hit_insn = 0;
11221123
notes->cover_insn = 0;
11231124

1124-
mutex_lock(&notes->lock);
1125+
annotation__lock(notes);
11251126
for (offset = size - 1; offset >= 0; --offset) {
11261127
struct cyc_hist *ch;
11271128

@@ -1140,7 +1141,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size)
11401141
notes->have_cycles = true;
11411142
}
11421143
}
1143-
mutex_unlock(&notes->lock);
1144+
annotation__unlock(notes);
11441145
}
11451146

11461147
int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
@@ -1291,17 +1292,64 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r
12911292
return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name);
12921293
}
12931294

1294-
void annotation__init(struct annotation *notes)
1295+
void annotation__exit(struct annotation *notes)
12951296
{
1296-
mutex_init(&notes->lock);
1297+
annotated_source__delete(notes->src);
12971298
}
12981299

1299-
void annotation__exit(struct annotation *notes)
1300+
static struct sharded_mutex *sharded_mutex;
1301+
1302+
static void annotation__init_sharded_mutex(void)
13001303
{
1301-
annotated_source__delete(notes->src);
1302-
mutex_destroy(&notes->lock);
1304+
/* As many mutexes as there are CPUs. */
1305+
sharded_mutex = sharded_mutex__new(cpu__max_present_cpu().cpu);
1306+
}
1307+
1308+
static size_t annotation__hash(const struct annotation *notes)
1309+
{
1310+
return (size_t)notes;
13031311
}
13041312

1313+
static struct mutex *annotation__get_mutex(const struct annotation *notes)
1314+
{
1315+
static pthread_once_t once = PTHREAD_ONCE_INIT;
1316+
1317+
pthread_once(&once, annotation__init_sharded_mutex);
1318+
if (!sharded_mutex)
1319+
return NULL;
1320+
1321+
return sharded_mutex__get_mutex(sharded_mutex, annotation__hash(notes));
1322+
}
1323+
1324+
void annotation__lock(struct annotation *notes)
1325+
NO_THREAD_SAFETY_ANALYSIS
1326+
{
1327+
struct mutex *mutex = annotation__get_mutex(notes);
1328+
1329+
if (mutex)
1330+
mutex_lock(mutex);
1331+
}
1332+
1333+
void annotation__unlock(struct annotation *notes)
1334+
NO_THREAD_SAFETY_ANALYSIS
1335+
{
1336+
struct mutex *mutex = annotation__get_mutex(notes);
1337+
1338+
if (mutex)
1339+
mutex_unlock(mutex);
1340+
}
1341+
1342+
bool annotation__trylock(struct annotation *notes)
1343+
{
1344+
struct mutex *mutex = annotation__get_mutex(notes);
1345+
1346+
if (!mutex)
1347+
return false;
1348+
1349+
return mutex_trylock(mutex);
1350+
}
1351+
1352+
13051353
static void annotation_line__add(struct annotation_line *al, struct list_head *head)
13061354
{
13071355
list_add_tail(&al->node, head);

tools/perf/util/annotate.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,7 @@ struct annotated_source {
271271
struct sym_hist *histograms;
272272
};
273273

274-
struct annotation {
275-
struct mutex lock;
274+
struct LOCKABLE annotation {
276275
u64 max_coverage;
277276
u64 start;
278277
u64 hit_cycles;
@@ -298,9 +297,15 @@ struct annotation {
298297
struct annotated_source *src;
299298
};
300299

301-
void annotation__init(struct annotation *notes);
300+
static inline void annotation__init(struct annotation *notes __maybe_unused)
301+
{
302+
}
302303
void annotation__exit(struct annotation *notes);
303304

305+
void annotation__lock(struct annotation *notes) EXCLUSIVE_LOCK_FUNCTION(*notes);
306+
void annotation__unlock(struct annotation *notes) UNLOCK_FUNCTION(*notes);
307+
bool annotation__trylock(struct annotation *notes) EXCLUSIVE_TRYLOCK_FUNCTION(true, *notes);
308+
304309
static inline int annotation__cycles_width(struct annotation *notes)
305310
{
306311
if (notes->have_cycles && notes->options->show_minmax_cycle)

0 commit comments

Comments
 (0)