Skip to content

Commit 8cacac6

Browse files
author
Ingo Molnar
committed
Merge tag 'perf-core-for-mingo-5.5-20191122' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: perf report: Jin Yao: - Allow entering the annotation view (symbol source/assembly + overhead/cycles/etc column) from the 'perf report --total-cycles' interface. E.g.: # perf record --all-cpus --branch-any --all-kernel ^C[ perf record: Woken up 5 times to write data ] # # perf evlist -v cycles: size: 120, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|BRANCH_STACK, read_format: ID, disabled: 1, inherit: 1, exclude_user: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1, branch_sample_type: ANY # # perf report --total-cycles # # Samples: 78762 of event 'cycles' Sampled Sampled Avg Avg Cycles% Cycles Cycles% Cycles [Program Block Range] Shared Object 1.72% 95.8K 0.00% 254 [msr.h:105 -> msr.h:166] [kernel.vmlinux] 1.56% 107.6K 0.00% 618 [compiler.h:199 -> common.c:301] [kernel.vmlinux] 0.83% 46.3K 0.00% 409 [entry_64.S:153 -> entry_64.S:175] [kernel.vmlinux] 0.83% 46.1K 0.00% 83 [jump_label.h:41 -> tsc.c:230] [kernel.vmlinux] 0.64% 36.9K 0.01% 1.4K [hda_intel.c:904 -> hda_intel.c:916] [snd_hda_intel] 0.57% 30.2K 0.00% 282 [file.c:710 -> file.c:730] [kernel.vmlinux] 0.48% 25.8K 0.00% 82 [spinlock.c:158 -> spinlock.c:160] [kernel.vmlinux] 0.45% 23.7K 0.00% 369 [tick-broadcast.c:585 -> tick-broadcast.c:586] [kernel.vmlinux] 0.44% 24.4K 0.00% 73 [msr.h:236 -> tsc.c:1088] [kernel.vmlinux] 0.43% 22.7K 0.00% 144 [cpuidle.c:229 -> cpuidle.c:232] [kernel.vmlinux] Then press 'A' or Enter on one of those lines, just like with 'perf top', say the top one: [msr.h:105 -> msr.h:166], then this shows up: Samples: 78K of event 'cycles', 4000 Hz, Event count (approx.): 78762 native_write_msr /lib/modules/5.4.0-rc8/build/vmlinux [Percent: local period] Percent│ IPC Cycle (Average IPC: 0.02, IPC Coverage: 50.0%) │ │ Disassembly of section .text: │ │ ffffffff8106c480 <native_write_msr>: │ __wrmsr(): │ return EAX_EDX_VAL(val, low, high); │ } │ │ static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high) │ { │ asm volatile("1: wrmsr\n" 49.16 │0.02 mov %edi,%ecx │0.02 mov %esi,%eax │0.02 wrmsr │ arch_static_branch(): │ #include <linux/stringify.h> │ #include <linux/types.h> │ │ static __always_inline bool arch_static_branch(struct static_key *key, bool branch) │ { │ asm_volatile_goto("1:" 0.79 │0.02 nop │ native_write_msr(): │ { │ __wrmsr(msr, low, high); │ │ if (msr_tracepoint_active(__tracepoint_write_msr)) │ do_trace_write_msr(msr, ((u64)high << 32 | low), 0); │ } 50.05 │0.02 254 ← retq │ do_trace_write_msr(msr, ((u64)high << 32 | low), 0); │ shl $0x20,%rdx │ mov %esi,%esi │ or %rdx,%rsi │ xor %edx,%edx │ → jmpq do_trace_write_msr We need to improve this to show the source code line numbers in the annotation view, so one can go from that program block to the annotation view and see those source code line numbers straight away. auxtrace/Intel PT: Adrian Hunter: - Add support for AUX area sampling, requires new functionality that will land in 5.5, its already in tip. This includes kernel capability querying so that it fails gracefully with older kernels, duimping aux area samples in 'perf report -D' and 'perf script'. perf.data: Alexey Budankov: - Fix decompression of PERF_RECORD_COMPRESSED records. core: Arnaldo Carvalho de Melo: - Use the 'dcacheline' cmp routine to find the right DSOs taking into account the 'maj', 'min', 'ino' and 'ino_generation', that got moved from 'struct map' to 'struct dso', where it belongs. This further reduces the size of 'struct map', there is still more work to do to maybe get it to max one cacheline. libtraceevent: Hewenliang: - Fix memory leakage in copy_filter_type(). Sudip Mukherjee: - Fix header installation. perf parse: Ian Rogers : - Fix potential memory leak when handling tracepoint errors, found using LLVM's libFuzzer. perf probe: Colin Ian King: - Fix spelling mistake "addrees" -> "address". Signed-off-by: Arnaldo Carvalho de Melo <[email protected]> Signed-off-by: Ingo Molnar <[email protected]>
2 parents 8f6ee51 + 4584f08 commit 8cacac6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1190
-200
lines changed

tools/include/uapi/linux/perf_event.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,9 @@ enum perf_event_sample_format {
141141
PERF_SAMPLE_TRANSACTION = 1U << 17,
142142
PERF_SAMPLE_REGS_INTR = 1U << 18,
143143
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
144+
PERF_SAMPLE_AUX = 1U << 20,
144145

145-
PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
146+
PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */
146147

147148
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
148149
};
@@ -300,6 +301,7 @@ enum perf_event_read_format {
300301
/* add: sample_stack_user */
301302
#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
302303
#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
304+
#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */
303305

304306
/*
305307
* Hardware event_id to monitor via a performance monitoring event:
@@ -424,7 +426,9 @@ struct perf_event_attr {
424426
*/
425427
__u32 aux_watermark;
426428
__u16 sample_max_stack;
427-
__u16 __reserved_2; /* align to __u64 */
429+
__u16 __reserved_2;
430+
__u32 aux_sample_size;
431+
__u32 __reserved_3;
428432
};
429433

430434
/*
@@ -864,6 +868,8 @@ enum perf_event_type {
864868
* { u64 abi; # enum perf_sample_regs_abi
865869
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
866870
* { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
871+
* { u64 size;
872+
* char data[size]; } && PERF_SAMPLE_AUX
867873
* };
868874
*/
869875
PERF_RECORD_SAMPLE = 9,

tools/lib/traceevent/Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -232,10 +232,10 @@ install_pkgconfig:
232232

233233
install_headers:
234234
$(call QUIET_INSTALL, headers) \
235-
$(call do_install,event-parse.h,$(DESTDIR)$(includedir_SQ),644); \
236-
$(call do_install,event-utils.h,$(DESTDIR)$(includedir_SQ),644); \
237-
$(call do_install,trace-seq.h,$(DESTDIR)$(includedir_SQ),644); \
238-
$(call do_install,kbuffer.h,$(DESTDIR)$(includedir_SQ),644)
235+
$(call do_install,event-parse.h,$(includedir_SQ),644); \
236+
$(call do_install,event-utils.h,$(includedir_SQ),644); \
237+
$(call do_install,trace-seq.h,$(includedir_SQ),644); \
238+
$(call do_install,kbuffer.h,$(includedir_SQ),644)
239239

240240
install: install_lib
241241

tools/lib/traceevent/parse-filter.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,8 +1473,10 @@ static int copy_filter_type(struct tep_event_filter *filter,
14731473
if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) {
14741474
/* Add trivial event */
14751475
arg = allocate_arg();
1476-
if (arg == NULL)
1476+
if (arg == NULL) {
1477+
free(str);
14771478
return -1;
1479+
}
14781480

14791481
arg->type = TEP_FILTER_ARG_BOOLEAN;
14801482
if (strcmp(str, "TRUE") == 0)
@@ -1483,8 +1485,11 @@ static int copy_filter_type(struct tep_event_filter *filter,
14831485
arg->boolean.value = 0;
14841486

14851487
filter_type = add_filter_type(filter, event->id);
1486-
if (filter_type == NULL)
1488+
if (filter_type == NULL) {
1489+
free(str);
1490+
free_arg(arg);
14871491
return -1;
1492+
}
14881493

14891494
filter_type->filter = arg;
14901495

tools/perf/Documentation/intel-pt.txt

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,56 @@ pwr_evt Enable power events. The power events provide information about
434434
"0" otherwise.
435435

436436

437+
AUX area sampling option
438+
------------------------
439+
440+
To select Intel PT "sampling" the AUX area sampling option can be used:
441+
442+
--aux-sample
443+
444+
Optionally it can be followed by the sample size in bytes e.g.
445+
446+
--aux-sample=8192
447+
448+
In addition, the Intel PT event to sample must be defined e.g.
449+
450+
-e intel_pt//u
451+
452+
Samples on other events will be created containing Intel PT data e.g. the
453+
following will create Intel PT samples on the branch-misses event, note the
454+
events must be grouped using {}:
455+
456+
perf record --aux-sample -e '{intel_pt//u,branch-misses:u}'
457+
458+
An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to
459+
events. In this case, the grouping is implied e.g.
460+
461+
perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u
462+
463+
is the same as:
464+
465+
perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}'
466+
467+
but allows for also using an address filter e.g.:
468+
469+
perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls
470+
471+
It is important to select a sample size that is big enough to contain at least
472+
one PSB packet. If not a warning will be displayed:
473+
474+
Intel PT sample size (%zu) may be too small for PSB period (%zu)
475+
476+
The calculation used for that is: if sample_size <= psb_period + 256 display the
477+
warning. When sampling is used, psb_period defaults to 0 (2KiB).
478+
479+
The default sample size is 4KiB.
480+
481+
The sample size is passed in aux_sample_size in struct perf_event_attr. The
482+
sample size is limited by the maximum event size which is 64KiB. It is
483+
difficult to know how big the event might be without the trace sample attached,
484+
but the tool validates that the sample size is not greater than 60KiB.
485+
486+
437487
new snapshot option
438488
-------------------
439489

@@ -487,8 +537,8 @@ their mlock limit (which defaults to 64KiB but is not multiplied by the number
487537
of cpus).
488538

489539
In full-trace mode, powers of two are allowed for buffer size, with a minimum
490-
size of 2 pages. In snapshot mode, it is the same but the minimum size is
491-
1 page.
540+
size of 2 pages. In snapshot mode or sampling mode, it is the same but the
541+
minimum size is 1 page.
492542

493543
The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
494544

@@ -501,12 +551,17 @@ Intel PT modes of operation
501551

502552
Intel PT can be used in 2 modes:
503553
full-trace mode
554+
sample mode
504555
snapshot mode
505556

506557
Full-trace mode traces continuously e.g.
507558

508559
perf record -e intel_pt//u uname
509560

561+
Sample mode attaches a Intel PT sample to other events e.g.
562+
563+
perf record --aux-sample -e intel_pt//u -e branch-misses:u
564+
510565
Snapshot mode captures the available data when a signal is sent e.g.
511566

512567
perf record -v -e intel_pt//u -S ./loopy 1000000000 &

tools/perf/Documentation/perf-record.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ OPTIONS
6262
like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'.
6363
- 'aux-output': Generate AUX records instead of events. This requires
6464
that an AUX area event is also provided.
65+
- 'aux-sample-size': Set sample size for AUX area sampling. If the
66+
'--aux-sample' option has been used, set aux-sample-size=0 to disable
67+
AUX area sampling for the event.
6568

6669
See the linkperf:perf-list[1] man page for more parameters.
6770

@@ -433,6 +436,12 @@ can be specified in a string that follows this option:
433436
In Snapshot Mode trace data is captured only when signal SIGUSR2 is received
434437
and on exit if the above 'e' option is given.
435438

439+
--aux-sample[=OPTIONS]::
440+
Select AUX area sampling. At least one of the events selected by the -e option
441+
must be an AUX area event. Samples on other events will be created containing
442+
data from the AUX area. Optionally sample size may be specified, otherwise it
443+
defaults to 4KiB.
444+
436445
--proc-map-timeout::
437446
When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
438447
because the file may be huge. A time out is needed in such cases.

tools/perf/arch/x86/util/auxtrace.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,11 @@ struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist,
2626
bool found_bts = false;
2727

2828
intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
29+
if (intel_pt_pmu)
30+
intel_pt_pmu->auxtrace = true;
2931
intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
32+
if (intel_bts_pmu)
33+
intel_bts_pmu->auxtrace = true;
3034

3135
evlist__for_each_entry(evlist, evsel) {
3236
if (intel_pt_pmu && evsel->core.attr.type == intel_pt_pmu->type)

tools/perf/arch/x86/util/intel-bts.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
113113
const struct perf_cpu_map *cpus = evlist->core.cpus;
114114
bool privileged = perf_event_paranoid_check(-1);
115115

116+
if (opts->auxtrace_sample_mode) {
117+
pr_err("Intel BTS does not support AUX area sampling\n");
118+
return -EINVAL;
119+
}
120+
116121
btsr->evlist = evlist;
117122
btsr->snapshot_mode = opts->auxtrace_snapshot_mode;
118123

tools/perf/arch/x86/util/intel-pt.c

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "../../util/event.h"
1818
#include "../../util/evlist.h"
1919
#include "../../util/evsel.h"
20+
#include "../../util/evsel_config.h"
2021
#include "../../util/cpumap.h"
2122
#include "../../util/mmap.h"
2223
#include <subcmd/parse-options.h>
@@ -551,6 +552,43 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
551552
evsel->core.attr.config);
552553
}
553554

555+
static void intel_pt_config_sample_mode(struct perf_pmu *intel_pt_pmu,
556+
struct evsel *evsel)
557+
{
558+
struct perf_evsel_config_term *term;
559+
u64 user_bits = 0, bits;
560+
561+
term = perf_evsel__get_config_term(evsel, CFG_CHG);
562+
if (term)
563+
user_bits = term->val.cfg_chg;
564+
565+
bits = perf_pmu__format_bits(&intel_pt_pmu->format, "psb_period");
566+
567+
/* Did user change psb_period */
568+
if (bits & user_bits)
569+
return;
570+
571+
/* Set psb_period to 0 */
572+
evsel->core.attr.config &= ~bits;
573+
}
574+
575+
static void intel_pt_min_max_sample_sz(struct evlist *evlist,
576+
size_t *min_sz, size_t *max_sz)
577+
{
578+
struct evsel *evsel;
579+
580+
evlist__for_each_entry(evlist, evsel) {
581+
size_t sz = evsel->core.attr.aux_sample_size;
582+
583+
if (!sz)
584+
continue;
585+
if (min_sz && (sz < *min_sz || !*min_sz))
586+
*min_sz = sz;
587+
if (max_sz && sz > *max_sz)
588+
*max_sz = sz;
589+
}
590+
}
591+
554592
/*
555593
* Currently, there is not enough information to disambiguate different PEBS
556594
* events, so only allow one.
@@ -606,6 +644,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
606644
return -EINVAL;
607645
}
608646

647+
if (opts->auxtrace_snapshot_mode && opts->auxtrace_sample_mode) {
648+
pr_err("Snapshot mode (" INTEL_PT_PMU_NAME " PMU) and sample trace cannot be used together\n");
649+
return -EINVAL;
650+
}
651+
609652
if (opts->use_clockid) {
610653
pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n");
611654
return -EINVAL;
@@ -617,6 +660,9 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
617660
if (!opts->full_auxtrace)
618661
return 0;
619662

663+
if (opts->auxtrace_sample_mode)
664+
intel_pt_config_sample_mode(intel_pt_pmu, intel_pt_evsel);
665+
620666
err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel);
621667
if (err)
622668
return err;
@@ -666,6 +712,34 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
666712
opts->auxtrace_snapshot_size, psb_period);
667713
}
668714

715+
/* Set default sizes for sample mode */
716+
if (opts->auxtrace_sample_mode) {
717+
size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
718+
size_t min_sz = 0, max_sz = 0;
719+
720+
intel_pt_min_max_sample_sz(evlist, &min_sz, &max_sz);
721+
if (!opts->auxtrace_mmap_pages && !privileged &&
722+
opts->mmap_pages == UINT_MAX)
723+
opts->mmap_pages = KiB(256) / page_size;
724+
if (!opts->auxtrace_mmap_pages) {
725+
size_t sz = round_up(max_sz, page_size) / page_size;
726+
727+
opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
728+
}
729+
if (max_sz > opts->auxtrace_mmap_pages * (size_t)page_size) {
730+
pr_err("Sample size %zu must not be greater than AUX area tracing mmap size %zu\n",
731+
max_sz,
732+
opts->auxtrace_mmap_pages * (size_t)page_size);
733+
return -EINVAL;
734+
}
735+
pr_debug2("Intel PT min. sample size: %zu max. sample size: %zu\n",
736+
min_sz, max_sz);
737+
if (psb_period &&
738+
min_sz <= psb_period + INTEL_PT_PSB_PERIOD_NEAR)
739+
ui__warning("Intel PT sample size (%zu) may be too small for PSB period (%zu)\n",
740+
min_sz, psb_period);
741+
}
742+
669743
/* Set default sizes for full trace mode */
670744
if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
671745
if (privileged) {
@@ -682,7 +756,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
682756
size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
683757
size_t min_sz;
684758

685-
if (opts->auxtrace_snapshot_mode)
759+
if (opts->auxtrace_snapshot_mode || opts->auxtrace_sample_mode)
686760
min_sz = KiB(4);
687761
else
688762
min_sz = KiB(8);
@@ -1136,5 +1210,10 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
11361210
ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
11371211
ptr->itr.reference = intel_pt_reference;
11381212
ptr->itr.read_finish = intel_pt_read_finish;
1213+
/*
1214+
* Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K
1215+
* should give at least 1 PSB per sample.
1216+
*/
1217+
ptr->itr.default_aux_sample_size = 4096;
11391218
return &ptr->itr;
11401219
}

tools/perf/builtin-inject.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ struct perf_inject {
4545
u64 aux_id;
4646
struct list_head samples;
4747
struct itrace_synth_opts itrace_synth_opts;
48+
char event_copy[PERF_SAMPLE_MAX_SIZE];
4849
};
4950

5051
struct event_entry {
@@ -214,6 +215,28 @@ static int perf_event__drop_aux(struct perf_tool *tool,
214215
return 0;
215216
}
216217

218+
static union perf_event *
219+
perf_inject__cut_auxtrace_sample(struct perf_inject *inject,
220+
union perf_event *event,
221+
struct perf_sample *sample)
222+
{
223+
size_t sz1 = sample->aux_sample.data - (void *)event;
224+
size_t sz2 = event->header.size - sample->aux_sample.size - sz1;
225+
union perf_event *ev = (union perf_event *)inject->event_copy;
226+
227+
if (sz1 > event->header.size || sz2 > event->header.size ||
228+
sz1 + sz2 > event->header.size ||
229+
sz1 < sizeof(struct perf_event_header) + sizeof(u64))
230+
return event;
231+
232+
memcpy(ev, event, sz1);
233+
memcpy((void *)ev + sz1, (void *)event + event->header.size - sz2, sz2);
234+
ev->header.size = sz1 + sz2;
235+
((u64 *)((void *)ev + sz1))[-1] = 0;
236+
237+
return ev;
238+
}
239+
217240
typedef int (*inject_handler)(struct perf_tool *tool,
218241
union perf_event *event,
219242
struct perf_sample *sample,
@@ -226,13 +249,19 @@ static int perf_event__repipe_sample(struct perf_tool *tool,
226249
struct evsel *evsel,
227250
struct machine *machine)
228251
{
252+
struct perf_inject *inject = container_of(tool, struct perf_inject,
253+
tool);
254+
229255
if (evsel && evsel->handler) {
230256
inject_handler f = evsel->handler;
231257
return f(tool, event, sample, evsel, machine);
232258
}
233259

234260
build_id__mark_dso_hit(tool, event, sample, evsel, machine);
235261

262+
if (inject->itrace_synth_opts.set && sample->aux_sample.size)
263+
event = perf_inject__cut_auxtrace_sample(inject, event, sample);
264+
236265
return perf_event__repipe_synth(tool, event);
237266
}
238267

0 commit comments

Comments
 (0)