Skip to content

Commit 7c1b16b

Browse files
ahunter6acmel
authored andcommitted
perf intel-pt: Add support for decoding FUP/TIP only
Use the new itrace 'q' option to add support for a mode of decoding that ignores TNT, does not walk object code, but gets the ip from FUP and TIP packets. Example: $ perf record -e intel_pt//u grep -rI pudding drivers [ perf record: Woken up 52 times to write data ] [ perf record: Captured and wrote 57.870 MB perf.data ] $ time perf script --itrace=bi | wc -l 58948289 real 1m23.863s user 1m23.251s sys 0m7.452s $ time perf script --itrace=biq | wc -l 3385694 real 0m4.453s user 0m4.455s sys 0m0.328s Signed-off-by: Adrian Hunter <[email protected]> Reviewed-by: Andi Kleen <[email protected]> Tested-by: Arnaldo Carvalho de Melo <[email protected]> Cc: Jiri Olsa <[email protected]> Link: http://lore.kernel.org/lkml/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent 5197153 commit 7c1b16b

File tree

4 files changed

+200
-5
lines changed

4 files changed

+200
-5
lines changed

tools/perf/Documentation/perf-intel-pt.txt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,7 @@ The letters are:
825825
l synthesize last branch entries (use with i or x)
826826
L synthesize last branch entries on existing event records
827827
s skip initial number of events
828+
q quicker (less detailed) decoding
828829

829830
"Instructions" events look like they were recorded by "perf record -e
830831
instructions".
@@ -969,6 +970,36 @@ at the beginning. This is useful to ignore initialization code.
969970

970971
skips the first million instructions.
971972

973+
The q option changes the way the trace is decoded. The decoding is much faster
974+
but much less detailed. Specifically, with the q option, the decoder does not
975+
decode TNT packets, and does not walk object code, but gets the ip from FUP and
976+
TIP packets. The q option can be used with the b and i options but the period
977+
is not used. The q option decodes more quickly, but is useful only if the
978+
control flow of interest is represented or indicated by FUP, TIP, TIP.PGE, or
979+
TIP.PGD packets (refer below). However the q option could be used to find time
980+
ranges that could then be decoded fully using the --time option.
981+
982+
What will *not* be decoded with the (single) q option:
983+
984+
- direct calls and jmps
985+
- conditional branches
986+
- non-branch instructions
987+
988+
What *will* be decoded with the (single) q option:
989+
990+
- asynchronous branches such as interrupts
991+
- indirect branches
992+
- function return target address *if* the noretcomp config term (refer
993+
config terms section) was used
994+
- start of (control-flow) tracing
995+
- end of (control-flow) tracing, if it is not out of context
996+
- power events, ptwrite, transaction start and abort
997+
- instruction pointer associated with PSB packets
998+
999+
Note the q option does not specify what events will be synthesized e.g. the p
1000+
option must be used also to show power events.
1001+
1002+
9721003
dump option
9731004
~~~~~~~~~~~
9741005

tools/perf/util/intel-pt-decoder/intel-pt-decoder.c

Lines changed: 163 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ enum intel_pt_pkt_state {
5555
INTEL_PT_STATE_TIP_PGD,
5656
INTEL_PT_STATE_FUP,
5757
INTEL_PT_STATE_FUP_NO_TIP,
58+
INTEL_PT_STATE_RESAMPLE,
5859
};
5960

6061
static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
@@ -65,6 +66,7 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
6566
case INTEL_PT_STATE_ERR_RESYNC:
6667
case INTEL_PT_STATE_IN_SYNC:
6768
case INTEL_PT_STATE_TNT_CONT:
69+
case INTEL_PT_STATE_RESAMPLE:
6870
return true;
6971
case INTEL_PT_STATE_TNT:
7072
case INTEL_PT_STATE_TIP:
@@ -109,6 +111,8 @@ struct intel_pt_decoder {
109111
bool fixup_last_mtc;
110112
bool have_last_ip;
111113
bool in_psb;
114+
bool hop;
115+
bool hop_psb_fup;
112116
enum intel_pt_param_flags flags;
113117
uint64_t pos;
114118
uint64_t last_ip;
@@ -235,6 +239,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
235239
decoder->data = params->data;
236240
decoder->return_compression = params->return_compression;
237241
decoder->branch_enable = params->branch_enable;
242+
decoder->hop = params->quick >= 1;
238243

239244
decoder->flags = params->flags;
240245

@@ -275,6 +280,9 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
275280
intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
276281
intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
277282

283+
if (decoder->hop)
284+
intel_pt_log("Hop mode: decoding FUP and TIPs, but not TNT\n");
285+
278286
return decoder;
279287
}
280288

@@ -1730,8 +1738,14 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
17301738

17311739
case INTEL_PT_FUP:
17321740
decoder->pge = true;
1733-
if (decoder->packet.count)
1741+
if (decoder->packet.count) {
17341742
intel_pt_set_last_ip(decoder);
1743+
if (decoder->hop) {
1744+
/* Act on FUP at PSBEND */
1745+
decoder->ip = decoder->last_ip;
1746+
decoder->hop_psb_fup = true;
1747+
}
1748+
}
17351749
break;
17361750

17371751
case INTEL_PT_MODE_TSX:
@@ -1875,6 +1889,118 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
18751889
}
18761890
}
18771891

1892+
static int intel_pt_resample(struct intel_pt_decoder *decoder)
1893+
{
1894+
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1895+
decoder->state.type = INTEL_PT_INSTRUCTION;
1896+
decoder->state.from_ip = decoder->ip;
1897+
decoder->state.to_ip = 0;
1898+
return 0;
1899+
}
1900+
1901+
#define HOP_PROCESS 0
1902+
#define HOP_IGNORE 1
1903+
#define HOP_RETURN 2
1904+
#define HOP_AGAIN 3
1905+
1906+
/* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */
1907+
static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err)
1908+
{
1909+
switch (decoder->packet.type) {
1910+
case INTEL_PT_TNT:
1911+
return HOP_IGNORE;
1912+
1913+
case INTEL_PT_TIP_PGD:
1914+
if (!decoder->packet.count)
1915+
return HOP_IGNORE;
1916+
intel_pt_set_ip(decoder);
1917+
decoder->state.type |= INTEL_PT_TRACE_END;
1918+
decoder->state.from_ip = 0;
1919+
decoder->state.to_ip = decoder->ip;
1920+
return HOP_RETURN;
1921+
1922+
case INTEL_PT_TIP:
1923+
if (!decoder->packet.count)
1924+
return HOP_IGNORE;
1925+
intel_pt_set_ip(decoder);
1926+
decoder->state.type = INTEL_PT_INSTRUCTION;
1927+
decoder->state.from_ip = decoder->ip;
1928+
decoder->state.to_ip = 0;
1929+
return HOP_RETURN;
1930+
1931+
case INTEL_PT_FUP:
1932+
if (!decoder->packet.count)
1933+
return HOP_IGNORE;
1934+
intel_pt_set_ip(decoder);
1935+
if (intel_pt_fup_event(decoder))
1936+
return HOP_RETURN;
1937+
if (!decoder->branch_enable)
1938+
*no_tip = true;
1939+
if (*no_tip) {
1940+
decoder->state.type = INTEL_PT_INSTRUCTION;
1941+
decoder->state.from_ip = decoder->ip;
1942+
decoder->state.to_ip = 0;
1943+
return HOP_RETURN;
1944+
}
1945+
*err = intel_pt_walk_fup_tip(decoder);
1946+
if (!*err)
1947+
decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
1948+
return HOP_RETURN;
1949+
1950+
case INTEL_PT_PSB:
1951+
decoder->last_ip = 0;
1952+
decoder->have_last_ip = true;
1953+
decoder->hop_psb_fup = false;
1954+
*err = intel_pt_walk_psbend(decoder);
1955+
if (*err == -EAGAIN)
1956+
return HOP_AGAIN;
1957+
if (*err)
1958+
return HOP_RETURN;
1959+
if (decoder->hop_psb_fup) {
1960+
decoder->hop_psb_fup = false;
1961+
decoder->state.type = INTEL_PT_INSTRUCTION;
1962+
decoder->state.from_ip = decoder->ip;
1963+
decoder->state.to_ip = 0;
1964+
return HOP_RETURN;
1965+
}
1966+
if (decoder->cbr != decoder->cbr_seen) {
1967+
decoder->state.type = 0;
1968+
return HOP_RETURN;
1969+
}
1970+
return HOP_IGNORE;
1971+
1972+
case INTEL_PT_BAD:
1973+
case INTEL_PT_PAD:
1974+
case INTEL_PT_TIP_PGE:
1975+
case INTEL_PT_TSC:
1976+
case INTEL_PT_TMA:
1977+
case INTEL_PT_MODE_EXEC:
1978+
case INTEL_PT_MODE_TSX:
1979+
case INTEL_PT_MTC:
1980+
case INTEL_PT_CYC:
1981+
case INTEL_PT_VMCS:
1982+
case INTEL_PT_PSBEND:
1983+
case INTEL_PT_CBR:
1984+
case INTEL_PT_TRACESTOP:
1985+
case INTEL_PT_PIP:
1986+
case INTEL_PT_OVF:
1987+
case INTEL_PT_MNT:
1988+
case INTEL_PT_PTWRITE:
1989+
case INTEL_PT_PTWRITE_IP:
1990+
case INTEL_PT_EXSTOP:
1991+
case INTEL_PT_EXSTOP_IP:
1992+
case INTEL_PT_MWAIT:
1993+
case INTEL_PT_PWRE:
1994+
case INTEL_PT_PWRX:
1995+
case INTEL_PT_BBP:
1996+
case INTEL_PT_BIP:
1997+
case INTEL_PT_BEP:
1998+
case INTEL_PT_BEP_IP:
1999+
default:
2000+
return HOP_PROCESS;
2001+
}
2002+
}
2003+
18782004
static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
18792005
{
18802006
bool no_tip = false;
@@ -1885,6 +2011,19 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
18852011
if (err)
18862012
return err;
18872013
next:
2014+
if (decoder->hop) {
2015+
switch (intel_pt_hop_trace(decoder, &no_tip, &err)) {
2016+
case HOP_IGNORE:
2017+
continue;
2018+
case HOP_RETURN:
2019+
return err;
2020+
case HOP_AGAIN:
2021+
goto next;
2022+
default:
2023+
break;
2024+
}
2025+
}
2026+
18882027
switch (decoder->packet.type) {
18892028
case INTEL_PT_TNT:
18902029
if (!decoder->packet.count)
@@ -1914,6 +2053,12 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
19142053
decoder->state.from_ip = 0;
19152054
decoder->state.to_ip = decoder->ip;
19162055
decoder->state.type |= INTEL_PT_TRACE_BEGIN;
2056+
/*
2057+
* In hop mode, resample to get the to_ip as an
2058+
* "instruction" sample.
2059+
*/
2060+
if (decoder->hop)
2061+
decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
19172062
return 0;
19182063
}
19192064

@@ -2033,7 +2178,7 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
20332178

20342179
case INTEL_PT_MODE_TSX:
20352180
/* MODE_TSX need not be followed by FUP */
2036-
if (!decoder->pge) {
2181+
if (!decoder->pge || decoder->in_psb) {
20372182
intel_pt_update_in_tx(decoder);
20382183
break;
20392184
}
@@ -2424,7 +2569,11 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
24242569
if (err)
24252570
return err;
24262571

2427-
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
2572+
/* In hop mode, resample to get the to_ip as an "instruction" sample */
2573+
if (decoder->hop)
2574+
decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
2575+
else
2576+
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
24282577
decoder->overflow = false;
24292578

24302579
decoder->state.from_ip = 0;
@@ -2545,7 +2694,14 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
25452694

25462695
if (decoder->ip) {
25472696
decoder->state.type = 0; /* Do not have a sample */
2548-
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
2697+
/*
2698+
* In hop mode, resample to get the PSB FUP ip as an
2699+
* "instruction" sample.
2700+
*/
2701+
if (decoder->hop)
2702+
decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
2703+
else
2704+
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
25492705
} else {
25502706
return intel_pt_sync_ip(decoder);
25512707
}
@@ -2609,6 +2765,9 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
26092765
if (err == -EAGAIN)
26102766
err = intel_pt_walk_trace(decoder);
26112767
break;
2768+
case INTEL_PT_STATE_RESAMPLE:
2769+
err = intel_pt_resample(decoder);
2770+
break;
26122771
default:
26132772
err = intel_pt_bug(decoder);
26142773
break;

tools/perf/util/intel-pt-decoder/intel-pt-decoder.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ struct intel_pt_params {
250250
uint32_t tsc_ctc_ratio_n;
251251
uint32_t tsc_ctc_ratio_d;
252252
enum intel_pt_param_flags flags;
253+
unsigned int quick;
253254
};
254255

255256
struct intel_pt_decoder;

tools/perf/util/intel-pt.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1030,6 +1030,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
10301030
params.mtc_period = intel_pt_mtc_period(pt);
10311031
params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
10321032
params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
1033+
params.quick = pt->synth_opts.quick;
10331034

10341035
if (pt->filts.cnt > 0)
10351036
params.pgd_ip = intel_pt_pgd_ip;
@@ -1423,7 +1424,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
14231424

14241425
sample.id = ptq->pt->instructions_id;
14251426
sample.stream_id = ptq->pt->instructions_id;
1426-
sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
1427+
if (pt->synth_opts.quick)
1428+
sample.period = 1;
1429+
else
1430+
sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
14271431

14281432
sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
14291433
if (sample.cyc_cnt) {

0 commit comments

Comments
 (0)