Skip to content

Commit 89e97eb

Browse files
Kan LiangPeter Zijlstra
authored andcommitted
perf/x86/intel/ds: Fix the conversion from TSC to perf time
The time order is incorrect when the TSC in a PEBS record is used. $perf record -e cycles:upp dd if=/dev/zero of=/dev/null count=10000 $ perf script --show-task-events perf-exec 0 0.000000: PERF_RECORD_COMM: perf-exec:915/915 dd 915 106.479872: PERF_RECORD_COMM exec: dd:915/915 dd 915 106.483270: PERF_RECORD_EXIT(915:915):(914:914) dd 915 106.512429: 1 cycles:upp: ffffffff96c011b7 [unknown] ([unknown]) ... ... The perf time is from sched_clock_cpu(). The current PEBS code unconditionally convert the TSC to native_sched_clock(). There is a shift between the two clocks. If the TSC is stable, the shift is consistent, __sched_clock_offset. If the TSC is unstable, the shift has to be calculated at runtime. This patch doesn't support the conversion when the TSC is unstable. The TSC unstable case is a corner case and very unlikely to happen. If it happens, the TSC in a PEBS record will be dropped and fall back to perf_event_clock(). Fixes: 47a3aeb ("perf/x86/intel/pebs: Fix PEBS timestamps overwritten") Reported-by: Namhyung Kim <[email protected]> Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lore.kernel.org/all/CAM9d7cgWDVAq8-11RbJ2uGfwkKD6fA-OMwOKDrNUrU_=8MgEjg@mail.gmail.com/
1 parent 5d515ee commit 89e97eb

File tree

1 file changed

+26
-9
lines changed
  • arch/x86/events/intel

1 file changed

+26
-9
lines changed

arch/x86/events/intel/ds.c

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,14 @@
22
#include <linux/bitops.h>
33
#include <linux/types.h>
44
#include <linux/slab.h>
5+
#include <linux/sched/clock.h>
56

67
#include <asm/cpu_entry_area.h>
78
#include <asm/perf_event.h>
89
#include <asm/tlbflush.h>
910
#include <asm/insn.h>
1011
#include <asm/io.h>
12+
#include <asm/timer.h>
1113

1214
#include "../perf_event.h"
1315

@@ -1568,6 +1570,27 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
15681570
return val;
15691571
}
15701572

1573+
static void setup_pebs_time(struct perf_event *event,
1574+
struct perf_sample_data *data,
1575+
u64 tsc)
1576+
{
1577+
/* Converting to a user-defined clock is not supported yet. */
1578+
if (event->attr.use_clockid != 0)
1579+
return;
1580+
1581+
/*
1582+
* Doesn't support the conversion when the TSC is unstable.
1583+
* The TSC unstable case is a corner case and very unlikely to
1584+
* happen. If it happens, the TSC in a PEBS record will be
1585+
* dropped and fall back to perf_event_clock().
1586+
*/
1587+
if (!using_native_sched_clock() || !sched_clock_stable())
1588+
return;
1589+
1590+
data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset;
1591+
data->sample_flags |= PERF_SAMPLE_TIME;
1592+
}
1593+
15711594
#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \
15721595
PERF_SAMPLE_PHYS_ADDR | \
15731596
PERF_SAMPLE_DATA_PAGE_SIZE)
@@ -1715,11 +1738,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
17151738
*
17161739
* We can only do this for the default trace clock.
17171740
*/
1718-
if (x86_pmu.intel_cap.pebs_format >= 3 &&
1719-
event->attr.use_clockid == 0) {
1720-
data->time = native_sched_clock_from_tsc(pebs->tsc);
1721-
data->sample_flags |= PERF_SAMPLE_TIME;
1722-
}
1741+
if (x86_pmu.intel_cap.pebs_format >= 3)
1742+
setup_pebs_time(event, data, pebs->tsc);
17231743

17241744
if (has_branch_stack(event))
17251745
perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
@@ -1781,10 +1801,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
17811801
perf_sample_data_init(data, 0, event->hw.last_period);
17821802
data->period = event->hw.last_period;
17831803

1784-
if (event->attr.use_clockid == 0) {
1785-
data->time = native_sched_clock_from_tsc(basic->tsc);
1786-
data->sample_flags |= PERF_SAMPLE_TIME;
1787-
}
1804+
setup_pebs_time(event, data, basic->tsc);
17881805

17891806
/*
17901807
* We must however always use iregs for the unwinder to stay sane; the

0 commit comments

Comments
 (0)