|
| 1 | +#!/usr/bin/env python |
| 2 | +import argparse |
| 3 | +import json |
| 4 | +import os |
| 5 | +import logging |
| 6 | +import pandas as pd |
| 7 | + |
| 8 | +# process the chrome traces output by the pytorch profiler |
| 9 | +# require the json input file's name to be in format {model_name}_chrome_trace_*.json |
| 10 | +# the runtimes file should have format (model_name, time) |
| 11 | + |
| 12 | +gpu_pids = [] |
| 13 | + |
| 14 | +def is_gpu_compute_event(event): |
| 15 | + global gpu_pids |
| 16 | + return "pid" in event and event["pid"] in gpu_pids and "ph" in event and event["ph"] == "X" |
| 17 | + |
| 18 | +def get_events(filename): |
| 19 | + f = open(filename) |
| 20 | + data = json.load(f) |
| 21 | + events = data["traceEvents"] |
| 22 | + return events |
| 23 | + |
| 24 | +def get_sorted_gpu_events(events): |
| 25 | + sorted_gpu_events = [] |
| 26 | + for event in events: |
| 27 | + if(not is_gpu_compute_event(event)): |
| 28 | + continue |
| 29 | + sorted_gpu_events.append(event) |
| 30 | + return sorted(sorted_gpu_events, key=lambda x: x["ts"]) |
| 31 | + |
| 32 | +def get_sorted_gpu_mm_conv_events(events): |
| 33 | + def is_mm_conv_event(event): |
| 34 | + return "name" in event and ("gemm" in event["name"] or "conv" in event["name"] |
| 35 | + or "cutlass" in event["name"] or "wgrad" in event["name"]) |
| 36 | + gpu_events = get_sorted_gpu_events(events) |
| 37 | + sorted_events = [] |
| 38 | + for event in gpu_events: |
| 39 | + if(not is_mm_conv_event(event)): |
| 40 | + continue |
| 41 | + sorted_events.append(event) |
| 42 | + return sorted_events |
| 43 | + |
| 44 | +def get_duration(sorted_gpu_events): |
| 45 | + event = sorted_gpu_events[0] |
| 46 | + current_end_time = event["ts"] + event["dur"] |
| 47 | + total_duration = event["dur"] |
| 48 | + for event in sorted_gpu_events[1:]: |
| 49 | + start_time = max(event["ts"], current_end_time) |
| 50 | + end_time = event["ts"] + event["dur"] |
| 51 | + total_duration = total_duration + max(end_time - start_time, 0) |
| 52 | + current_end_time = max(current_end_time, end_time) |
| 53 | + return total_duration |
| 54 | + |
| 55 | +def get_model_name(filename): |
| 56 | + _, tail = os.path.split(filename) |
| 57 | + modelname = tail[:tail.find("_chrome_trace")] |
| 58 | + return modelname |
| 59 | + |
| 60 | +def get_total_length(run_times_df, modelname): |
| 61 | + return float(run_times_df[run_times_df["name"]==modelname]["runtime"]) |
| 62 | + |
| 63 | +def main(): |
| 64 | + parser = argparse.ArgumentParser() |
| 65 | + group = parser.add_mutually_exclusive_group(required=True) |
| 66 | + parser.add_argument( |
| 67 | + "--runtime", "-runf", help="file name of the runtime file", required=True |
| 68 | + ) |
| 69 | + group.add_argument( |
| 70 | + "--filename", "-f", action="append", help="a filename of the json file to process" |
| 71 | + ) |
| 72 | + group.add_argument( |
| 73 | + "--folder", "-fd", help="a folder of the json files to process" |
| 74 | + ) |
| 75 | + args = parser.parse_args() |
| 76 | + |
| 77 | + run_times_df = pd.read_csv(args.runtime) |
| 78 | + |
| 79 | + if args.filename: |
| 80 | + filenames = args.filename |
| 81 | + elif args.folder: |
| 82 | + filenames = [] |
| 83 | + directory = args.folder |
| 84 | + for filename in os.listdir(directory): |
| 85 | + f = os.path.join(directory, filename) |
| 86 | + if os.path.isfile(f) and f.endswith(".json"): |
| 87 | + filenames.append(f) |
| 88 | + else: |
| 89 | + print("Please provide a filename or a folder name") |
| 90 | + |
| 91 | + print(f"modelname, GPU Utilization, MM and Conv time") |
| 92 | + |
| 93 | + for filename in filenames: |
| 94 | + try: |
| 95 | + events = get_events(filename) |
| 96 | + |
| 97 | + # get pids of GPU events |
| 98 | + global gpu_pids |
| 99 | + for event in events: |
| 100 | + if "name" not in event: |
| 101 | + continue |
| 102 | + if event["name"] == 'process_labels' and "GPU" in event["args"]["labels"]: |
| 103 | + gpu_pids.append(event["pid"]) |
| 104 | + |
| 105 | + modelname = get_model_name(filename) |
| 106 | + total_length = get_total_length(run_times_df, modelname) * 1e6 |
| 107 | + |
| 108 | + sorted_gpu_events = get_sorted_gpu_events(events) |
| 109 | + utilization = get_duration(sorted_gpu_events) / total_length |
| 110 | + |
| 111 | + sorted_gpu_mm_conv_events = get_sorted_gpu_mm_conv_events(events) |
| 112 | + mm_conv_utilization = get_duration(sorted_gpu_mm_conv_events) / total_length |
| 113 | + |
| 114 | + print(f"{modelname}, {utilization}, {mm_conv_utilization}") |
| 115 | + except: |
| 116 | + logging.exception(f"{filename}, ERROR") |
| 117 | + print(f"{filename}, ERROR") |
| 118 | + |
| 119 | + |
| 120 | +if __name__ == "__main__": |
| 121 | + main() |
0 commit comments