Skip to content

Commit fdd0f81

Browse files
Petar Gligoricacmel
authored andcommitted
perf script: task-analyzer add csv support
This patch adds the possibility to write the trace and the summary as csv files to a user specified file. A format as such simplifies further data processing. This is achieved by having ";" as separators instead of spaces and solely one header per file. Additional parameters are being considered, like in the normal usage of the script. Colors are turned off in the case of a csv output, thus the highlight option is also being ignored. Usage: Write standard task to csv file: $ perf script report tasks-analyzer --csv <file> write limited output to csv file in nanoseconds: $ perf script report tasks-analyzer --csv <file> --ns --limit-to-tasks 1337 Write summary to a csv file: $ perf script report tasks-analyzer --csv-summary <file> Write summary to csv file with additional schedule information: $ perf script report tasks-analyzer --csv-summary <file> --summary-extended Write both summary and standard task to a csv file: $ perf script report tasks-analyzer --csv --csv-summary The following examples illustrate what is possible with the CSV output. The first command sequence will record all scheduler switch events for 10 seconds, the task-analyzer calculates task information like runtimes as CSV. A small python snippet using pandas and matplotlib will visualize the most frequent task (e.g. kworker/1:1) runtimes - each runtime as a bar in a bar chart: $ perf record -e sched:sched_switch -a -- sleep 10 $ perf script report tasks-analyzer --ns --csv tasks.csv $ cat << EOF > /tmp/freq-comm-runtimes-bar.py import pandas as pd import matplotlib.pyplot as plt df = pd.read_csv("tasks.csv", sep=';') most_freq_comm = df["COMM"].value_counts().idxmax() most_freq_runtimes = df[df["COMM"]==most_freq_comm]["Runtime"] plt.title(f"Runtimes for Task {most_freq_comm} in Nanoseconds") plt.bar(range(len(most_freq_runtimes)), most_freq_runtimes) plt.show() $ python3 /tmp/freq-comm-runtimes-bar.py As a seconds example, the subsequent script generates a pie chart of all accumulated tasks runtimes for 10 seconds of system recordings: $ perf record -e sched:sched_switch -a -- sleep 10 $ perf script report tasks-analyzer --csv-summary task-summary.csv $ cat << EOF > /tmp/accumulated-task-pie.py import pandas as pd from matplotlib.pyplot import pie, axis, show df = pd.read_csv("task-summary.csv", sep=';') sums = df.groupby(df["Comm"])["Accumulated"].sum() axis("equal") pie(sums, labels=sums.index); show() EOF $ python3 /tmp/accumulated-task-pie.py A variety of other visualizations are possible in matplotlib and other environments. Of course, pandas, numpy and co. also allow easy statistical analysis of the data! Signed-off-by: Petar Gligoric <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Ian Rogers <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Namhyung Kim <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Hagen Paul Pfeifer <[email protected]> Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent e76aff0 commit fdd0f81

File tree

1 file changed

+185
-89
lines changed

1 file changed

+185
-89
lines changed

tools/perf/scripts/python/task-analyzer.py

Lines changed: 185 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,18 @@ def _parse_args():
156156
help="always, never or auto, allowing configuring color output"
157157
" via the command line",
158158
)
159+
parser.add_argument(
160+
"--csv",
161+
default="",
162+
help="Write trace to file selected by user. Options, like --ns or --extended"
163+
"-times are used.",
164+
)
165+
parser.add_argument(
166+
"--csv-summary",
167+
default="",
168+
help="Write summary to file selected by user. Options, like --ns or"
169+
" --summary-extended are used.",
170+
)
159171
args = parser.parse_args()
160172
args.tid_renames = dict()
161173

@@ -275,7 +287,6 @@ def _update_max_entries(self):
275287

276288

277289

278-
279290
class Summary(object):
280291
"""
281292
Primary instance for calculating the summary output. Processes the whole trace to
@@ -327,7 +338,7 @@ def _print_header(self):
327338
sum(db["inter_times"].values()) - 4 * decimal_precision
328339
)
329340
_header += ("Max Inter Task Times",)
330-
print(fmt.format(*_header))
341+
fd_sum.write(fmt.format(*_header) + "\n")
331342

332343
def _column_titles(self):
333344
"""
@@ -336,34 +347,58 @@ def _column_titles(self):
336347
values are being displayed in grey. Thus in their format two additional {},
337348
are placed for color set and reset.
338349
"""
350+
separator, fix_csv_align = _prepare_fmt_sep()
339351
decimal_precision, time_precision = _prepare_fmt_precision()
340-
fmt = " {{:>{}}}".format(db["task_info"]["pid"])
341-
fmt += " {{:>{}}}".format(db["task_info"]["tid"])
342-
fmt += " {{:>{}}}".format(db["task_info"]["comm"])
343-
fmt += " {{:>{}}}".format(db["runtime_info"]["runs"])
344-
fmt += " {{:>{}}}".format(db["runtime_info"]["acc"])
345-
fmt += " {{:>{}}}".format(db["runtime_info"]["mean"])
346-
fmt += " {{:>{}}}".format(db["runtime_info"]["median"])
347-
fmt += " {{:>{}}}".format(db["runtime_info"]["min"] - decimal_precision)
348-
fmt += " {{:>{}}}".format(db["runtime_info"]["max"] - decimal_precision)
349-
fmt += " {{}}{{:>{}}}{{}}".format(db["runtime_info"]["max_at"] - time_precision)
352+
fmt = "{{:>{}}}".format(db["task_info"]["pid"] * fix_csv_align)
353+
fmt += "{}{{:>{}}}".format(separator, db["task_info"]["tid"] * fix_csv_align)
354+
fmt += "{}{{:>{}}}".format(separator, db["task_info"]["comm"] * fix_csv_align)
355+
fmt += "{}{{:>{}}}".format(separator, db["runtime_info"]["runs"] * fix_csv_align)
356+
fmt += "{}{{:>{}}}".format(separator, db["runtime_info"]["acc"] * fix_csv_align)
357+
fmt += "{}{{:>{}}}".format(separator, db["runtime_info"]["mean"] * fix_csv_align)
358+
fmt += "{}{{:>{}}}".format(
359+
separator, db["runtime_info"]["median"] * fix_csv_align
360+
)
361+
fmt += "{}{{:>{}}}".format(
362+
separator, (db["runtime_info"]["min"] - decimal_precision) * fix_csv_align
363+
)
364+
fmt += "{}{{:>{}}}".format(
365+
separator, (db["runtime_info"]["max"] - decimal_precision) * fix_csv_align
366+
)
367+
fmt += "{}{{}}{{:>{}}}{{}}".format(
368+
separator, (db["runtime_info"]["max_at"] - time_precision) * fix_csv_align
369+
)
350370

351371
column_titles = ("PID", "TID", "Comm")
352372
column_titles += ("Runs", "Accumulated", "Mean", "Median", "Min", "Max")
353-
column_titles += (_COLORS["grey"], "At", _COLORS["reset"])
373+
column_titles += (_COLORS["grey"], "Max At", _COLORS["reset"])
354374

355375
if args.summary_extended:
356-
fmt += " {{:>{}}}".format(db["inter_times"]["out_in"] - decimal_precision)
357-
fmt += " {{}}{{:>{}}}{{}}".format(
358-
db["inter_times"]["inter_at"] - time_precision
376+
fmt += "{}{{:>{}}}".format(
377+
separator,
378+
(db["inter_times"]["out_in"] - decimal_precision) * fix_csv_align
379+
)
380+
fmt += "{}{{}}{{:>{}}}{{}}".format(
381+
separator,
382+
(db["inter_times"]["inter_at"] - time_precision) * fix_csv_align
383+
)
384+
fmt += "{}{{:>{}}}".format(
385+
separator,
386+
(db["inter_times"]["out_out"] - decimal_precision) * fix_csv_align
387+
)
388+
fmt += "{}{{:>{}}}".format(
389+
separator,
390+
(db["inter_times"]["in_in"] - decimal_precision) * fix_csv_align
391+
)
392+
fmt += "{}{{:>{}}}".format(
393+
separator,
394+
(db["inter_times"]["in_out"] - decimal_precision) * fix_csv_align
359395
)
360-
fmt += " {{:>{}}}".format(db["inter_times"]["out_out"] - decimal_precision)
361-
fmt += " {{:>{}}}".format(db["inter_times"]["in_in"] - decimal_precision)
362-
fmt += " {{:>{}}}".format(db["inter_times"]["in_out"] - decimal_precision)
363396

364397
column_titles += ("Out-In", _COLORS["grey"], "Max At", _COLORS["reset"],
365398
"Out-Out", "In-In", "In-Out")
366-
print(fmt.format(*column_titles))
399+
400+
fd_sum.write(fmt.format(*column_titles) + "\n")
401+
367402

368403
def _task_stats(self):
369404
"""calculates the stats of every task and constructs the printable summary"""
@@ -414,39 +449,53 @@ def _task_stats(self):
414449
self._calc_alignments_summary(align_helper)
415450

416451
def _format_stats(self):
452+
separator, fix_csv_align = _prepare_fmt_sep()
417453
decimal_precision, time_precision = _prepare_fmt_precision()
418-
fmt = " {{:>{}d}}".format(db["task_info"]["pid"])
419-
fmt += " {{:>{}d}}".format(db["task_info"]["tid"])
420-
fmt += " {{:>{}}}".format(db["task_info"]["comm"])
421-
fmt += " {{:>{}d}}".format(db["runtime_info"]["runs"])
422-
fmt += " {{:>{}.{}f}}".format(db["runtime_info"]["acc"], time_precision)
423-
fmt += " {{}}{{:>{}.{}f}}".format(db["runtime_info"]["mean"], time_precision)
424-
fmt += " {{:>{}.{}f}}".format(db["runtime_info"]["median"], time_precision)
425-
fmt += " {{:>{}.{}f}}".format(
426-
db["runtime_info"]["min"] - decimal_precision, time_precision
427-
)
428-
fmt += " {{:>{}.{}f}}".format(
429-
db["runtime_info"]["max"] - decimal_precision, time_precision
430-
)
431-
fmt += " {{}}{{:>{}.{}f}}{{}}{{}}".format(
432-
db["runtime_info"]["max_at"] - time_precision, decimal_precision
454+
len_pid = db["task_info"]["pid"] * fix_csv_align
455+
len_tid = db["task_info"]["tid"] * fix_csv_align
456+
len_comm = db["task_info"]["comm"] * fix_csv_align
457+
len_runs = db["runtime_info"]["runs"] * fix_csv_align
458+
len_acc = db["runtime_info"]["acc"] * fix_csv_align
459+
len_mean = db["runtime_info"]["mean"] * fix_csv_align
460+
len_median = db["runtime_info"]["median"] * fix_csv_align
461+
len_min = (db["runtime_info"]["min"] - decimal_precision) * fix_csv_align
462+
len_max = (db["runtime_info"]["max"] - decimal_precision) * fix_csv_align
463+
len_max_at = (db["runtime_info"]["max_at"] - time_precision) * fix_csv_align
464+
if args.summary_extended:
465+
len_out_in = (
466+
db["inter_times"]["out_in"] - decimal_precision
467+
) * fix_csv_align
468+
len_inter_at = (
469+
db["inter_times"]["inter_at"] - time_precision
470+
) * fix_csv_align
471+
len_out_out = (
472+
db["inter_times"]["out_out"] - decimal_precision
473+
) * fix_csv_align
474+
len_in_in = (db["inter_times"]["in_in"] - decimal_precision) * fix_csv_align
475+
len_in_out = (
476+
db["inter_times"]["in_out"] - decimal_precision
477+
) * fix_csv_align
478+
479+
fmt = "{{:{}d}}".format(len_pid)
480+
fmt += "{}{{:{}d}}".format(separator, len_tid)
481+
fmt += "{}{{:>{}}}".format(separator, len_comm)
482+
fmt += "{}{{:{}d}}".format(separator, len_runs)
483+
fmt += "{}{{:{}.{}f}}".format(separator, len_acc, time_precision)
484+
fmt += "{}{{}}{{:{}.{}f}}".format(separator, len_mean, time_precision)
485+
fmt += "{}{{:{}.{}f}}".format(separator, len_median, time_precision)
486+
fmt += "{}{{:{}.{}f}}".format(separator, len_min, time_precision)
487+
fmt += "{}{{:{}.{}f}}".format(separator, len_max, time_precision)
488+
fmt += "{}{{}}{{:{}.{}f}}{{}}{{}}".format(
489+
separator, len_max_at, decimal_precision
433490
)
434491
if args.summary_extended:
435-
fmt += " {{:>{}.{}f}}".format(
436-
db["inter_times"]["out_in"] - decimal_precision, time_precision
437-
)
438-
fmt += " {{}}{{:>{}.{}f}}{{}}".format(
439-
db["inter_times"]["inter_at"] - time_precision, decimal_precision
440-
)
441-
fmt += " {{:>{}.{}f}}".format(
442-
db["inter_times"]["out_out"] - decimal_precision, time_precision
443-
)
444-
fmt += " {{:>{}.{}f}}".format(
445-
db["inter_times"]["in_in"] - decimal_precision, time_precision
446-
)
447-
fmt += " {{:>{}.{}f}}".format(
448-
db["inter_times"]["in_out"] - decimal_precision, time_precision
492+
fmt += "{}{{:{}.{}f}}".format(separator, len_out_in, time_precision)
493+
fmt += "{}{{}}{{:{}.{}f}}{{}}".format(
494+
separator, len_inter_at, decimal_precision
449495
)
496+
fmt += "{}{{:{}.{}f}}".format(separator, len_out_out, time_precision)
497+
fmt += "{}{{:{}.{}f}}".format(separator, len_in_in, time_precision)
498+
fmt += "{}{{:{}.{}f}}".format(separator, len_in_out, time_precision)
450499
return fmt
451500

452501

@@ -467,13 +516,15 @@ def _calc_alignments_summary(self, align_helper):
467516

468517

469518
def print(self):
470-
print("\nSummary")
471519
self._task_stats()
472-
self._print_header()
473-
self._column_titles()
474520
fmt = self._format_stats()
521+
522+
if not args.csv_summary:
523+
print("\nSummary")
524+
self._print_header()
525+
self._column_titles()
475526
for i in range(len(self._body)):
476-
print(fmt.format(*tuple(self._body[i])))
527+
fd_sum.write(fmt.format(*tuple(self._body[i])) + "\n")
477528

478529

479530

@@ -531,37 +582,45 @@ def _filter_non_printable(unfiltered):
531582

532583

533584
def _fmt_header():
534-
fmt = "{{:>{}}}".format(LEN_SWITCHED_IN)
535-
fmt += " {{:>{}}}".format(LEN_SWITCHED_OUT)
536-
fmt += " {{:>{}}}".format(LEN_CPU)
537-
fmt += " {{:>{}}}".format(LEN_PID)
538-
fmt += " {{:>{}}}".format(LEN_TID)
539-
fmt += " {{:>{}}}".format(LEN_COMM)
540-
fmt += " {{:>{}}}".format(LEN_RUNTIME)
541-
fmt += " {{:>{}}}".format(LEN_OUT_IN)
585+
separator, fix_csv_align = _prepare_fmt_sep()
586+
fmt = "{{:>{}}}".format(LEN_SWITCHED_IN*fix_csv_align)
587+
fmt += "{}{{:>{}}}".format(separator, LEN_SWITCHED_OUT*fix_csv_align)
588+
fmt += "{}{{:>{}}}".format(separator, LEN_CPU*fix_csv_align)
589+
fmt += "{}{{:>{}}}".format(separator, LEN_PID*fix_csv_align)
590+
fmt += "{}{{:>{}}}".format(separator, LEN_TID*fix_csv_align)
591+
fmt += "{}{{:>{}}}".format(separator, LEN_COMM*fix_csv_align)
592+
fmt += "{}{{:>{}}}".format(separator, LEN_RUNTIME*fix_csv_align)
593+
fmt += "{}{{:>{}}}".format(separator, LEN_OUT_IN*fix_csv_align)
542594
if args.extended_times:
543-
fmt += " {{:>{}}}".format(LEN_OUT_OUT)
544-
fmt += " {{:>{}}}".format(LEN_IN_IN)
545-
fmt += " {{:>{}}}".format(LEN_IN_OUT)
595+
fmt += "{}{{:>{}}}".format(separator, LEN_OUT_OUT*fix_csv_align)
596+
fmt += "{}{{:>{}}}".format(separator, LEN_IN_IN*fix_csv_align)
597+
fmt += "{}{{:>{}}}".format(separator, LEN_IN_OUT*fix_csv_align)
546598
return fmt
547599

548600

549601
def _fmt_body():
602+
separator, fix_csv_align = _prepare_fmt_sep()
550603
decimal_precision, time_precision = _prepare_fmt_precision()
551-
fmt = "{{}}{{:{}.{}f}}".format(LEN_SWITCHED_IN, decimal_precision)
552-
fmt += " {{:{}.{}f}}".format(LEN_SWITCHED_OUT, decimal_precision)
553-
fmt += " {{:{}d}}".format(LEN_CPU)
554-
fmt += " {{:{}d}}".format(LEN_PID)
555-
fmt += " {{}}{{:{}d}}{{}}".format(LEN_TID)
556-
fmt += " {{}}{{:>{}}}".format(LEN_COMM)
557-
fmt += " {{:{}.{}f}}".format(LEN_RUNTIME, time_precision)
604+
fmt = "{{}}{{:{}.{}f}}".format(LEN_SWITCHED_IN*fix_csv_align, decimal_precision)
605+
fmt += "{}{{:{}.{}f}}".format(
606+
separator, LEN_SWITCHED_OUT*fix_csv_align, decimal_precision
607+
)
608+
fmt += "{}{{:{}d}}".format(separator, LEN_CPU*fix_csv_align)
609+
fmt += "{}{{:{}d}}".format(separator, LEN_PID*fix_csv_align)
610+
fmt += "{}{{}}{{:{}d}}{{}}".format(separator, LEN_TID*fix_csv_align)
611+
fmt += "{}{{}}{{:>{}}}".format(separator, LEN_COMM*fix_csv_align)
612+
fmt += "{}{{:{}.{}f}}".format(separator, LEN_RUNTIME*fix_csv_align, time_precision)
558613
if args.extended_times:
559-
fmt += " {{:{}.{}f}}".format(LEN_OUT_IN, time_precision)
560-
fmt += " {{:{}.{}f}}".format(LEN_OUT_OUT, time_precision)
561-
fmt += " {{:{}.{}f}}".format(LEN_IN_IN, time_precision)
562-
fmt += " {{:{}.{}f}}{{}}".format(LEN_IN_OUT, time_precision)
614+
fmt += "{}{{:{}.{}f}}".format(separator, LEN_OUT_IN*fix_csv_align, time_precision)
615+
fmt += "{}{{:{}.{}f}}".format(separator, LEN_OUT_OUT*fix_csv_align, time_precision)
616+
fmt += "{}{{:{}.{}f}}".format(separator, LEN_IN_IN*fix_csv_align, time_precision)
617+
fmt += "{}{{:{}.{}f}}{{}}".format(
618+
separator, LEN_IN_OUT*fix_csv_align, time_precision
619+
)
563620
else:
564-
fmt += " {{:{}.{}f}}{{}}".format(LEN_OUT_IN, time_precision)
621+
fmt += "{}{{:{}.{}f}}{{}}".format(
622+
separator, LEN_OUT_IN*fix_csv_align, time_precision
623+
)
565624
return fmt
566625

567626

@@ -571,7 +630,8 @@ def _print_header():
571630
"Time Out-In")
572631
if args.extended_times:
573632
header += ("Time Out-Out", "Time In-In", "Time In-Out")
574-
print(fmt.format(*header))
633+
fd_task.write(fmt.format(*header) + "\n")
634+
575635

576636

577637
def _print_task_finish(task):
@@ -583,7 +643,6 @@ def _print_task_finish(task):
583643
in_in = -1
584644
in_out = -1
585645
fmt = _fmt_body()
586-
587646
# depending on user provided highlight option we change the color
588647
# for particular tasks
589648
if str(task.tid) in args.highlight_tasks_map:
@@ -612,16 +671,22 @@ def _print_task_finish(task):
612671
out_out = timespan_gap_tid.out_out
613672
in_in = timespan_gap_tid.in_in
614673
in_out = timespan_gap_tid.in_out
674+
675+
615676
if args.extended_times:
616-
print(fmt.format(c_row_set, task.time_in(), task.time_out(), task.cpu, task.pid,
617-
c_tid_set, task.tid, c_tid_reset, c_row_set, task.comm,
677+
line_out = fmt.format(c_row_set, task.time_in(), task.time_out(), task.cpu,
678+
task.pid, c_tid_set, task.tid, c_tid_reset, c_row_set, task.comm,
618679
task.runtime(time_unit), out_in, out_out, in_in, in_out,
619-
c_row_reset))
680+
c_row_reset) + "\n"
620681
else:
621-
print(fmt.format(c_row_set, task.time_in(), task.time_out(), task.cpu, task.pid,
622-
c_tid_set, task.tid, c_tid_reset, c_row_set, task.comm,
623-
task.runtime(time_unit), out_in, c_row_reset))
624-
682+
line_out = fmt.format(c_row_set, task.time_in(), task.time_out(), task.cpu,
683+
task.pid, c_tid_set, task.tid, c_tid_reset, c_row_set, task.comm,
684+
task.runtime(time_unit), out_in, c_row_reset) + "\n"
685+
try:
686+
fd_task.write(line_out)
687+
except(IOError):
688+
# don't mangle the output if user SIGINT this script
689+
sys.exit()
625690

626691
def _record_cleanup(_list):
627692
"""
@@ -733,10 +798,19 @@ def _argument_filter_sanity_check():
733798
)
734799
if args.time_limit and (args.summary or args.summary_only or args.summary_extended):
735800
sys.exit("Error: Cannot set time limit and print summary")
736-
801+
if args.csv_summary:
802+
args.summary = True
803+
if args.csv == args.csv_summary:
804+
sys.exit("Error: Chosen files for csv and csv summary are the same")
805+
if args.csv and (args.summary_extended or args.summary) and not args.csv_summary:
806+
sys.exit("Error: No file chosen to write summary to. Choose with --csv-summary "
807+
"<file>")
808+
if args.csv and args.summary_only:
809+
sys.exit("Error: --csv chosen and --summary-only. Standard task would not be"
810+
"written to csv file.")
737811

738812
def _argument_prepare_check():
739-
global time_unit
813+
global time_unit, fd_task, fd_sum
740814
if args.filter_tasks:
741815
args.filter_tasks = args.filter_tasks.split(",")
742816
if args.limit_to_tasks:
@@ -769,6 +843,21 @@ def _argument_prepare_check():
769843
time_unit = "ms"
770844

771845

846+
fd_task = sys.stdout
847+
if args.csv:
848+
args.stdio_color = "never"
849+
fd_task = open(args.csv, "w")
850+
print("generating csv at",args.csv,)
851+
852+
fd_sum = sys.stdout
853+
if args.csv_summary:
854+
args.stdio_color = "never"
855+
fd_sum = open(args.csv_summary, "w")
856+
print("generating csv summary at",args.csv_summary)
857+
if not args.csv:
858+
args.summary_only = True
859+
860+
772861
def _is_within_timelimit(time):
773862
"""
774863
Check if a time limit was given by parameter, if so ignore the rest. If not,
@@ -801,10 +890,17 @@ def _prepare_fmt_precision():
801890
decimal_precision = 6
802891
time_precision = 3
803892
if args.ns:
804-
decimal_precision = 9
805-
time_precision = 0
893+
decimal_precision = 9
894+
time_precision = 0
806895
return decimal_precision, time_precision
807896

897+
def _prepare_fmt_sep():
898+
separator = " "
899+
fix_csv_align = 1
900+
if args.csv or args.csv_summary:
901+
separator = ";"
902+
fix_csv_align = 0
903+
return separator, fix_csv_align
808904

809905
def trace_unhandled(event_name, context, event_fields_dict, perf_sample_dict):
810906
pass

0 commit comments

Comments
 (0)