|
| 1 | +#!/usr/bin/env python |
| 2 | +#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# |
| 3 | +# |
| 4 | +# The LLVM Compiler Infrastructure |
| 5 | +# |
| 6 | +# This file is distributed under the University of Illinois Open Source |
| 7 | +# License. See LICENSE.TXT for details. |
| 8 | +# |
| 9 | +#===------------------------------------------------------------------------===# |
| 10 | +import bisect |
| 11 | +import os |
| 12 | +import re |
| 13 | +import subprocess |
| 14 | +import sys |
| 15 | + |
| 16 | +llvm_symbolizer = None |
| 17 | +symbolizers = {} |
| 18 | +filetypes = {} |
| 19 | +vmaddrs = {} |
| 20 | +DEBUG = False |
| 21 | + |
| 22 | + |
| 23 | +# FIXME: merge the code that calls fix_filename(). |
| 24 | +def fix_filename(file_name): |
| 25 | + for path_to_cut in sys.argv[1:]: |
| 26 | + file_name = re.sub('.*' + path_to_cut, '', file_name) |
| 27 | + file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) |
| 28 | + file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) |
| 29 | + return file_name |
| 30 | + |
| 31 | + |
| 32 | +class Symbolizer(object): |
| 33 | + def __init__(self): |
| 34 | + pass |
| 35 | + |
| 36 | + def symbolize(self, addr, binary, offset): |
| 37 | + """Symbolize the given address (pair of binary and offset). |
| 38 | +
|
| 39 | + Overridden in subclasses. |
| 40 | + Args: |
| 41 | + addr: virtual address of an instruction. |
| 42 | + binary: path to executable/shared object containing this instruction. |
| 43 | + offset: instruction offset in the @binary. |
| 44 | + Returns: |
| 45 | + list of strings (one string for each inlined frame) describing |
| 46 | + the code locations for this instruction (that is, function name, file |
| 47 | + name, line and column numbers). |
| 48 | + """ |
| 49 | + return None |
| 50 | + |
| 51 | + |
| 52 | +class LLVMSymbolizer(Symbolizer): |
| 53 | + def __init__(self, symbolizer_path): |
| 54 | + super(LLVMSymbolizer, self).__init__() |
| 55 | + self.symbolizer_path = symbolizer_path |
| 56 | + self.pipe = self.open_llvm_symbolizer() |
| 57 | + |
| 58 | + def open_llvm_symbolizer(self): |
| 59 | + if not os.path.exists(self.symbolizer_path): |
| 60 | + return None |
| 61 | + cmd = [self.symbolizer_path, |
| 62 | + '--use-symbol-table=true', |
| 63 | + '--demangle=false', |
| 64 | + '--functions=true', |
| 65 | + '--inlining=true'] |
| 66 | + if DEBUG: |
| 67 | + print(' '.join(cmd)) |
| 68 | + return subprocess.Popen(cmd, stdin=subprocess.PIPE, |
| 69 | + stdout=subprocess.PIPE) |
| 70 | + |
| 71 | + def symbolize(self, addr, binary, offset): |
| 72 | + """Overrides Symbolizer.symbolize.""" |
| 73 | + if not self.pipe: |
| 74 | + return None |
| 75 | + result = [] |
| 76 | + try: |
| 77 | + symbolizer_input = '%s %s' % (binary, offset) |
| 78 | + if DEBUG: |
| 79 | + print(symbolizer_input) |
| 80 | + self.pipe.stdin.write(symbolizer_input) |
| 81 | + self.pipe.stdin.write('\n') |
| 82 | + while True: |
| 83 | + function_name = self.pipe.stdout.readline().rstrip() |
| 84 | + if not function_name: |
| 85 | + break |
| 86 | + file_name = self.pipe.stdout.readline().rstrip() |
| 87 | + file_name = fix_filename(file_name) |
| 88 | + if (not function_name.startswith('??') and |
| 89 | + not file_name.startswith('??')): |
| 90 | + # Append only valid frames. |
| 91 | + result.append('%s in %s %s' % (addr, function_name, |
| 92 | + file_name)) |
| 93 | + except Exception: |
| 94 | + result = [] |
| 95 | + if not result: |
| 96 | + result = None |
| 97 | + return result |
| 98 | + |
| 99 | + |
| 100 | +def LLVMSymbolizerFactory(system): |
| 101 | + symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') |
| 102 | + if not symbolizer_path: |
| 103 | + # Assume llvm-symbolizer is in PATH. |
| 104 | + symbolizer_path = 'llvm-symbolizer' |
| 105 | + return LLVMSymbolizer(symbolizer_path) |
| 106 | + |
| 107 | + |
| 108 | +class Addr2LineSymbolizer(Symbolizer): |
| 109 | + def __init__(self, binary): |
| 110 | + super(Addr2LineSymbolizer, self).__init__() |
| 111 | + self.binary = binary |
| 112 | + self.pipe = self.open_addr2line() |
| 113 | + |
| 114 | + def open_addr2line(self): |
| 115 | + cmd = ['addr2line', '-f', '-e', self.binary] |
| 116 | + if DEBUG: |
| 117 | + print(' '.join(cmd)) |
| 118 | + return subprocess.Popen(cmd, |
| 119 | + stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| 120 | + |
| 121 | + def symbolize(self, addr, binary, offset): |
| 122 | + """Overrides Symbolizer.symbolize.""" |
| 123 | + if self.binary != binary: |
| 124 | + return None |
| 125 | + try: |
| 126 | + self.pipe.stdin.write(offset) |
| 127 | + self.pipe.stdin.write('\n') |
| 128 | + function_name = self.pipe.stdout.readline().rstrip() |
| 129 | + file_name = self.pipe.stdout.readline().rstrip() |
| 130 | + except Exception: |
| 131 | + function_name = '' |
| 132 | + file_name = '' |
| 133 | + file_name = fix_filename(file_name) |
| 134 | + return ['%s in %s %s' % (addr, function_name, file_name)] |
| 135 | + |
| 136 | + |
| 137 | +class DarwinSymbolizer(Symbolizer): |
| 138 | + def __init__(self, addr, binary): |
| 139 | + super(DarwinSymbolizer, self).__init__() |
| 140 | + self.binary = binary |
| 141 | + # Guess which arch we're running. 10 = len('0x') + 8 hex digits. |
| 142 | + if len(addr) > 10: |
| 143 | + self.arch = 'x86_64' |
| 144 | + else: |
| 145 | + self.arch = 'i386' |
| 146 | + self.vmaddr = None |
| 147 | + self.pipe = None |
| 148 | + |
| 149 | + def write_addr_to_pipe(self, offset): |
| 150 | + self.pipe.stdin.write('0x%x' % int(offset, 16)) |
| 151 | + self.pipe.stdin.write('\n') |
| 152 | + |
| 153 | + def open_atos(self): |
| 154 | + if DEBUG: |
| 155 | + print('atos -o %s -arch %s' % (self.binary, self.arch)) |
| 156 | + cmdline = ['atos', '-o', self.binary, '-arch', self.arch] |
| 157 | + self.pipe = subprocess.Popen(cmdline, |
| 158 | + stdin=subprocess.PIPE, |
| 159 | + stdout=subprocess.PIPE, |
| 160 | + stderr=subprocess.PIPE) |
| 161 | + |
| 162 | + def symbolize(self, addr, binary, offset): |
| 163 | + """Overrides Symbolizer.symbolize.""" |
| 164 | + if self.binary != binary: |
| 165 | + return None |
| 166 | + self.open_atos() |
| 167 | + self.write_addr_to_pipe(offset) |
| 168 | + self.pipe.stdin.close() |
| 169 | + atos_line = self.pipe.stdout.readline().rstrip() |
| 170 | + # A well-formed atos response looks like this: |
| 171 | + # foo(type1, type2) (in object.name) (filename.cc:80) |
| 172 | + match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) |
| 173 | + if DEBUG: |
| 174 | + print('atos_line: {0}'.format(atos_line)) |
| 175 | + if match: |
| 176 | + function_name = match.group(1) |
| 177 | + function_name = re.sub('\(.*?\)', '', function_name) |
| 178 | + file_name = fix_filename(match.group(3)) |
| 179 | + return ['%s in %s %s' % (addr, function_name, file_name)] |
| 180 | + else: |
| 181 | + return ['%s in %s' % (addr, atos_line)] |
| 182 | + |
| 183 | + |
| 184 | +# Chain several symbolizers so that if one symbolizer fails, we fall back |
| 185 | +# to the next symbolizer in chain. |
| 186 | +class ChainSymbolizer(Symbolizer): |
| 187 | + def __init__(self, symbolizer_list): |
| 188 | + super(ChainSymbolizer, self).__init__() |
| 189 | + self.symbolizer_list = symbolizer_list |
| 190 | + |
| 191 | + def symbolize(self, addr, binary, offset): |
| 192 | + """Overrides Symbolizer.symbolize.""" |
| 193 | + for symbolizer in self.symbolizer_list: |
| 194 | + if symbolizer: |
| 195 | + result = symbolizer.symbolize(addr, binary, offset) |
| 196 | + if result: |
| 197 | + return result |
| 198 | + return None |
| 199 | + |
| 200 | + def append_symbolizer(self, symbolizer): |
| 201 | + self.symbolizer_list.append(symbolizer) |
| 202 | + |
| 203 | + |
| 204 | +def BreakpadSymbolizerFactory(binary): |
| 205 | + suffix = os.getenv('BREAKPAD_SUFFIX') |
| 206 | + if suffix: |
| 207 | + filename = binary + suffix |
| 208 | + if os.access(filename, os.F_OK): |
| 209 | + return BreakpadSymbolizer(filename) |
| 210 | + return None |
| 211 | + |
| 212 | + |
| 213 | +def SystemSymbolizerFactory(system, addr, binary): |
| 214 | + if system == 'Darwin': |
| 215 | + return DarwinSymbolizer(addr, binary) |
| 216 | + elif system == 'Linux': |
| 217 | + return Addr2LineSymbolizer(binary) |
| 218 | + |
| 219 | + |
| 220 | +class BreakpadSymbolizer(Symbolizer): |
| 221 | + def __init__(self, filename): |
| 222 | + super(BreakpadSymbolizer, self).__init__() |
| 223 | + self.filename = filename |
| 224 | + lines = file(filename).readlines() |
| 225 | + self.files = [] |
| 226 | + self.symbols = {} |
| 227 | + self.address_list = [] |
| 228 | + self.addresses = {} |
| 229 | + # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t |
| 230 | + fragments = lines[0].rstrip().split() |
| 231 | + self.arch = fragments[2] |
| 232 | + self.debug_id = fragments[3] |
| 233 | + self.binary = ' '.join(fragments[4:]) |
| 234 | + self.parse_lines(lines[1:]) |
| 235 | + |
| 236 | + def parse_lines(self, lines): |
| 237 | + cur_function_addr = '' |
| 238 | + for line in lines: |
| 239 | + fragments = line.split() |
| 240 | + if fragments[0] == 'FILE': |
| 241 | + assert int(fragments[1]) == len(self.files) |
| 242 | + self.files.append(' '.join(fragments[2:])) |
| 243 | + elif fragments[0] == 'PUBLIC': |
| 244 | + self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) |
| 245 | + elif fragments[0] in ['CFI', 'STACK']: |
| 246 | + pass |
| 247 | + elif fragments[0] == 'FUNC': |
| 248 | + cur_function_addr = int(fragments[1], 16) |
| 249 | + if not cur_function_addr in self.symbols.keys(): |
| 250 | + self.symbols[cur_function_addr] = ' '.join(fragments[4:]) |
| 251 | + else: |
| 252 | + # Line starting with an address. |
| 253 | + addr = int(fragments[0], 16) |
| 254 | + self.address_list.append(addr) |
| 255 | + # Tuple of symbol address, size, line, file number. |
| 256 | + self.addresses[addr] = (cur_function_addr, |
| 257 | + int(fragments[1], 16), |
| 258 | + int(fragments[2]), |
| 259 | + int(fragments[3])) |
| 260 | + self.address_list.sort() |
| 261 | + |
| 262 | + def get_sym_file_line(self, addr): |
| 263 | + key = None |
| 264 | + if addr in self.addresses.keys(): |
| 265 | + key = addr |
| 266 | + else: |
| 267 | + index = bisect.bisect_left(self.address_list, addr) |
| 268 | + if index == 0: |
| 269 | + return None |
| 270 | + else: |
| 271 | + key = self.address_list[index - 1] |
| 272 | + sym_id, size, line_no, file_no = self.addresses[key] |
| 273 | + symbol = self.symbols[sym_id] |
| 274 | + filename = self.files[file_no] |
| 275 | + if addr < key + size: |
| 276 | + return symbol, filename, line_no |
| 277 | + else: |
| 278 | + return None |
| 279 | + |
| 280 | + def symbolize(self, addr, binary, offset): |
| 281 | + if self.binary != binary: |
| 282 | + return None |
| 283 | + res = self.get_sym_file_line(int(offset, 16)) |
| 284 | + if res: |
| 285 | + function_name, file_name, line_no = res |
| 286 | + result = ['%s in %s %s:%d' % ( |
| 287 | + addr, function_name, file_name, line_no)] |
| 288 | + print(result) |
| 289 | + return result |
| 290 | + else: |
| 291 | + return None |
| 292 | + |
| 293 | + |
| 294 | +class SymbolizationLoop(object): |
| 295 | + def __init__(self, binary_name_filter=None): |
| 296 | + # Used by clients who may want to supply a different binary name. |
| 297 | + # E.g. in Chrome several binaries may share a single .dSYM. |
| 298 | + self.binary_name_filter = binary_name_filter |
| 299 | + self.system = os.uname()[0] |
| 300 | + if self.system in ['Linux', 'Darwin']: |
| 301 | + self.llvm_symbolizer = LLVMSymbolizerFactory(self.system) |
| 302 | + else: |
| 303 | + raise Exception('Unknown system') |
| 304 | + |
| 305 | + def symbolize_address(self, addr, binary, offset): |
| 306 | + # Use the chain of symbolizers: |
| 307 | + # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos |
| 308 | + # (fall back to next symbolizer if the previous one fails). |
| 309 | + if not binary in symbolizers: |
| 310 | + symbolizers[binary] = ChainSymbolizer( |
| 311 | + [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer]) |
| 312 | + result = symbolizers[binary].symbolize(addr, binary, offset) |
| 313 | + if result is None: |
| 314 | + # Initialize system symbolizer only if other symbolizers failed. |
| 315 | + symbolizers[binary].append_symbolizer( |
| 316 | + SystemSymbolizerFactory(self.system, addr, binary)) |
| 317 | + result = symbolizers[binary].symbolize(addr, binary, offset) |
| 318 | + # The system symbolizer must produce some result. |
| 319 | + assert result |
| 320 | + return result |
| 321 | + |
| 322 | + def print_symbolized_lines(self, symbolized_lines): |
| 323 | + if not symbolized_lines: |
| 324 | + print(self.current_line) |
| 325 | + else: |
| 326 | + for symbolized_frame in symbolized_lines: |
| 327 | + print(' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip()) |
| 328 | + self.frame_no += 1 |
| 329 | + |
| 330 | + def process_stdin(self): |
| 331 | + self.frame_no = 0 |
| 332 | + |
| 333 | + if sys.version_info[0] == 2: |
| 334 | + sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) |
| 335 | + else: |
| 336 | + # Unbuffered output is not supported in Python 3 |
| 337 | + sys.stdout = os.fdopen(sys.stdout.fileno(), 'w') |
| 338 | + |
| 339 | + while True: |
| 340 | + line = sys.stdin.readline() |
| 341 | + if not line: break |
| 342 | + self.current_line = line.rstrip() |
| 343 | + #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) |
| 344 | + stack_trace_line_format = ( |
| 345 | + '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') |
| 346 | + match = re.match(stack_trace_line_format, line) |
| 347 | + if not match: |
| 348 | + print(self.current_line) |
| 349 | + continue |
| 350 | + if DEBUG: |
| 351 | + print(line) |
| 352 | + _, frameno_str, addr, binary, offset = match.groups() |
| 353 | + if frameno_str == '0': |
| 354 | + # Assume that frame #0 is the first frame of new stack trace. |
| 355 | + self.frame_no = 0 |
| 356 | + original_binary = binary |
| 357 | + if self.binary_name_filter: |
| 358 | + binary = self.binary_name_filter(binary) |
| 359 | + symbolized_line = self.symbolize_address(addr, binary, offset) |
| 360 | + if not symbolized_line: |
| 361 | + if original_binary != binary: |
| 362 | + symbolized_line = self.symbolize_address(addr, binary, offset) |
| 363 | + self.print_symbolized_lines(symbolized_line) |
| 364 | + |
| 365 | + |
| 366 | +if __name__ == '__main__': |
| 367 | + loop = SymbolizationLoop() |
| 368 | + loop.process_stdin() |
0 commit comments