|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +import argparse |
| 4 | +from pathlib import Path |
| 5 | +import logging |
| 6 | +import pydot |
| 7 | +import networkx as nx |
| 8 | +import sys |
| 9 | +from collections import defaultdict |
| 10 | + |
| 11 | +parser = argparse.ArgumentParser() |
| 12 | +parser.add_argument("cfg_dir", help="directory of CFG files and callgraph.txt") |
| 13 | +parser.add_argument('targets_file', help="file containing <target_file>:<target_line> pairs") |
| 14 | +args = parser.parse_args() |
| 15 | + |
| 16 | +logging.basicConfig( |
| 17 | + filename='cfg_preprocess.log', |
| 18 | + level=logging.INFO, |
| 19 | + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' |
| 20 | +) |
| 21 | + |
| 22 | +cfg_dir = Path(args.cfg_dir) |
| 23 | + |
| 24 | +targets = [] |
| 25 | + |
| 26 | +for line in open(args.targets_file): |
| 27 | + target_file, target_line = line.strip().split(':') |
| 28 | + target_line = int(target_line) |
| 29 | + targets.append((target_file, target_line)) |
| 30 | + |
| 31 | + print(target_file, target_line, file=sys.stderr) |
| 32 | + |
| 33 | +for target in targets: |
| 34 | + logging.info(f'Target: {target[0]}:{target[1]}') |
| 35 | + |
| 36 | +# each line looks like |
| 37 | +# simplified-lowering.cc:_ZN2v88internal8compiler22RepresentationSelector12EnqueueInputILNS1_5PhaseE0EEEvPNS1_4NodeEiNS1_7UseInfoE:0:0:384103392 _ZN2v88internal8compiler22RepresentationSelector7GetInfoEPNS1_4NodeE |
| 38 | +def parse_callgraph(callgraph_txt): |
| 39 | + with open(callgraph_txt, 'r') as f: |
| 40 | + for line in f: |
| 41 | + caller, callee = line.strip().split() |
| 42 | + # print(f'{caller} -> {callee}') |
| 43 | + filename, funcname, lineno, order, bbid = caller.split(':') |
| 44 | + yield (filename, funcname, lineno, order, hex(int(bbid))), callee |
| 45 | + |
| 46 | +target_nodes = [] |
| 47 | + |
| 48 | +# need to extract (function name -> first block id) |
| 49 | +def parse_cfg(cfg_file): |
| 50 | + graph: pydot.Graph = pydot.graph_from_dot_file(cfg_file)[0] |
| 51 | + graph: nx.MultiDiGraph = nx.drawing.nx_pydot.from_pydot(graph) |
| 52 | + # print('graph name', graph.name) |
| 53 | + # set edge weight as 1 |
| 54 | + for u, v, k, d in graph.edges(keys=True, data=True): |
| 55 | + d['weight'] = 1 |
| 56 | + |
| 57 | + for n, d in graph.nodes(data=True): |
| 58 | + label = d['label'].strip('{}"') |
| 59 | + filename, funcname, lineno, order = label.split(':') |
| 60 | + d['filename'] = filename |
| 61 | + d['funcname'] = funcname |
| 62 | + d['lineno'] = int(lineno) |
| 63 | + d['order'] = int(order) |
| 64 | + |
| 65 | + for filename, lineno in targets: |
| 66 | + if d['filename'] in filename and d['lineno'] == lineno: |
| 67 | + target_nodes.append(n) |
| 68 | + logging.info(f'Found target node {n} in {cfg_file}') |
| 69 | + break |
| 70 | + |
| 71 | + # print('graph nodes', graph.nodes(data=True)) |
| 72 | + # print('graph edges', graph.edges(keys=True, data=True)) |
| 73 | + return graph |
| 74 | + |
| 75 | +callgraph = list(parse_callgraph(cfg_dir / 'callgraph.txt')) |
| 76 | +cfgs = [] |
| 77 | +func_to_node = {} |
| 78 | +for f in cfg_dir.glob('*.dot'): |
| 79 | + g = parse_cfg(f) |
| 80 | + cfgs.append(g) |
| 81 | + |
| 82 | + the_node = min(g.nodes(data=True), key=lambda n: n[1]['order']) |
| 83 | + # print(the_node) |
| 84 | + func_to_node[g.name] = the_node[0] |
| 85 | + |
| 86 | +# for func, first_node in func_to_node.items(): |
| 87 | +# print(f'{func} -> {first_node}') |
| 88 | + |
| 89 | +logging.info(f'{len(target_nodes)} target nodes found') |
| 90 | + |
| 91 | +logging.info(f'{len(callgraph)} call edges found in callgraph.txt') |
| 92 | +logging.info(f'{len(cfgs)} CFG files found in {cfg_dir}') |
| 93 | + |
| 94 | +node_cnt = sum([cfg.number_of_nodes() for cfg in cfgs]) |
| 95 | +edge_cnt = sum([cfg.number_of_edges() for cfg in cfgs]) |
| 96 | + |
| 97 | +logging.info(f'Total number of nodes: {node_cnt}') |
| 98 | +logging.info(f'Total number of edges: {edge_cnt}') |
| 99 | + |
| 100 | +# merge cfgs into one |
| 101 | +entire_cfg = nx.MultiDiGraph() |
| 102 | +for cfg in cfgs: |
| 103 | + entire_cfg = nx.compose(entire_cfg, cfg) |
| 104 | + |
| 105 | +for caller, callee in callgraph: |
| 106 | + filename, funcname, lineno, order, bbid = caller |
| 107 | + node = 'Node' + bbid |
| 108 | + assert node in entire_cfg, f'Node {node} not found in entire CFG' |
| 109 | + entire_cfg.add_edge(node, func_to_node[callee], weight=10) |
| 110 | + |
| 111 | +logging.info(f'Number of nodes in entire CFG: {entire_cfg.number_of_nodes()}') |
| 112 | +logging.info(f'Number of edges in entire CFG: {entire_cfg.number_of_edges()}') |
| 113 | + |
| 114 | +fulldistmap = defaultdict(list) |
| 115 | +for v in target_nodes: |
| 116 | + distmap = nx.single_source_dijkstra_path_length(entire_cfg, v) |
| 117 | + for n, distance in distmap.items(): |
| 118 | + fulldistmap[n].append(distance) |
| 119 | + |
| 120 | +for n, distances in fulldistmap.items(): |
| 121 | + bbid = n[4:] |
| 122 | + # compute harmonic mean |
| 123 | + if 0 not in distances: |
| 124 | + harmonic_mean = len(distances) / sum([1/d for d in distances]) |
| 125 | + print(f'{bbid} {harmonic_mean}') |
| 126 | + else: |
| 127 | + print(f'{bbid} 0.0') |
| 128 | + |
0 commit comments