Skip to content

Commit 8b8ff8c

Browse files
ahunter6Peter Zijlstra
authored andcommitted
perf/x86: Add new event for AUX output counter index
PEBS-via-PT records contain a mask of applicable counters. To identify which event belongs to which counter, a side-band event is needed. Until now, there has been no side-band event, and consequently users were limited to using a single event. Add such a side-band event. Note the event is optimised to output only when the counter index changes for an event. That works only so long as all PEBS-via-PT events are scheduled together, which they are for a recording session because they are in a single group. Also no attribute bit is used to select the new event, so a new kernel is not compatible with older perf tools. The assumption being that PEBS-via-PT is sufficiently esoteric that users will not be troubled by this. Signed-off-by: Adrian Hunter <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 4110083 commit 8b8ff8c

File tree

6 files changed

+69
-0
lines changed

6 files changed

+69
-0
lines changed

arch/x86/events/core.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all);
6666
DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable);
6767
DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable);
6868

69+
DEFINE_STATIC_CALL_NULL(x86_pmu_assign, *x86_pmu.assign);
70+
6971
DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add);
7072
DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del);
7173
DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
@@ -1215,6 +1217,8 @@ static inline void x86_assign_hw_event(struct perf_event *event,
12151217
hwc->last_cpu = smp_processor_id();
12161218
hwc->last_tag = ++cpuc->tags[i];
12171219

1220+
static_call_cond(x86_pmu_assign)(event, idx);
1221+
12181222
switch (hwc->idx) {
12191223
case INTEL_PMC_IDX_FIXED_BTS:
12201224
case INTEL_PMC_IDX_FIXED_VLBR:
@@ -2005,6 +2009,8 @@ static void x86_pmu_static_call_update(void)
20052009
static_call_update(x86_pmu_enable, x86_pmu.enable);
20062010
static_call_update(x86_pmu_disable, x86_pmu.disable);
20072011

2012+
static_call_update(x86_pmu_assign, x86_pmu.assign);
2013+
20082014
static_call_update(x86_pmu_add, x86_pmu.add);
20092015
static_call_update(x86_pmu_del, x86_pmu.del);
20102016
static_call_update(x86_pmu_read, x86_pmu.read);

arch/x86/events/intel/core.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2402,6 +2402,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
24022402
intel_pmu_pebs_disable(event);
24032403
}
24042404

2405+
static void intel_pmu_assign_event(struct perf_event *event, int idx)
2406+
{
2407+
if (is_pebs_pt(event))
2408+
perf_report_aux_output_id(event, idx);
2409+
}
2410+
24052411
static void intel_pmu_del_event(struct perf_event *event)
24062412
{
24072413
if (needs_branch_stack(event))
@@ -4494,8 +4500,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
44944500
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
44954501
}
44964502

4503+
static void intel_aux_output_init(void)
4504+
{
4505+
/* Refer also intel_pmu_aux_output_match() */
4506+
if (x86_pmu.intel_cap.pebs_output_pt_available)
4507+
x86_pmu.assign = intel_pmu_assign_event;
4508+
}
4509+
44974510
static int intel_pmu_aux_output_match(struct perf_event *event)
44984511
{
4512+
/* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
44994513
if (!x86_pmu.intel_cap.pebs_output_pt_available)
45004514
return 0;
45014515

@@ -6301,6 +6315,8 @@ __init int intel_pmu_init(void)
63016315
if (is_hybrid())
63026316
intel_pmu_check_hybrid_pmus((u64)fixed_mask);
63036317

6318+
intel_aux_output_init();
6319+
63046320
return 0;
63056321
}
63066322

arch/x86/events/perf_event.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,6 +726,7 @@ struct x86_pmu {
726726
void (*enable_all)(int added);
727727
void (*enable)(struct perf_event *);
728728
void (*disable)(struct perf_event *);
729+
void (*assign)(struct perf_event *event, int idx);
729730
void (*add)(struct perf_event *);
730731
void (*del)(struct perf_event *);
731732
void (*read)(struct perf_event *event);

include/linux/perf_event.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,6 +1397,7 @@ perf_event_addr_filters(struct perf_event *event)
13971397
}
13981398

13991399
extern void perf_event_addr_filters_sync(struct perf_event *event);
1400+
extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
14001401

14011402
extern int perf_output_begin(struct perf_output_handle *handle,
14021403
struct perf_sample_data *data,

include/uapi/linux/perf_event.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,6 +1141,21 @@ enum perf_event_type {
11411141
*/
11421142
PERF_RECORD_TEXT_POKE = 20,
11431143

1144+
/*
1145+
* Data written to the AUX area by hardware due to aux_output, may need
1146+
* to be matched to the event by an architecture-specific hardware ID.
1147+
* This records the hardware ID, but requires sample_id to provide the
1148+
* event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT
1149+
* records from multiple events.
1150+
*
1151+
* struct {
1152+
* struct perf_event_header header;
1153+
* u64 hw_id;
1154+
* struct sample_id sample_id;
1155+
* };
1156+
*/
1157+
PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
1158+
11441159
PERF_RECORD_MAX, /* non-ABI */
11451160
};
11461161

kernel/events/core.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9062,6 +9062,36 @@ static void perf_log_itrace_start(struct perf_event *event)
90629062
perf_output_end(&handle);
90639063
}
90649064

9065+
void perf_report_aux_output_id(struct perf_event *event, u64 hw_id)
9066+
{
9067+
struct perf_output_handle handle;
9068+
struct perf_sample_data sample;
9069+
struct perf_aux_event {
9070+
struct perf_event_header header;
9071+
u64 hw_id;
9072+
} rec;
9073+
int ret;
9074+
9075+
if (event->parent)
9076+
event = event->parent;
9077+
9078+
rec.header.type = PERF_RECORD_AUX_OUTPUT_HW_ID;
9079+
rec.header.misc = 0;
9080+
rec.header.size = sizeof(rec);
9081+
rec.hw_id = hw_id;
9082+
9083+
perf_event_header__init_id(&rec.header, &sample, event);
9084+
ret = perf_output_begin(&handle, &sample, event, rec.header.size);
9085+
9086+
if (ret)
9087+
return;
9088+
9089+
perf_output_put(&handle, rec);
9090+
perf_event__output_id_sample(event, &handle, &sample);
9091+
9092+
perf_output_end(&handle);
9093+
}
9094+
90659095
static int
90669096
__perf_event_account_interrupt(struct perf_event *event, int throttle)
90679097
{

0 commit comments

Comments
 (0)