Skip to content

Commit 4706289

Browse files
committed
chore: add build utils and third party
1 parent 0f1a405 commit 4706289

File tree

93 files changed

+46057
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

93 files changed

+46057
-0
lines changed

.gitattributes

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
*.so filter=lfs diff=lfs merge=lfs -text
2+
*.zip filter=lfs diff=lfs merge=lfs -text
3+
*.jar filter=lfs diff=lfs merge=lfs -text
4+
*.tar filter=lfs diff=lfs merge=lfs -text
5+
*.gz filter=lfs diff=lfs merge=lfs -text
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# Note this file is merely a placeholder that contains no suppressions for now.
19+
# But it may become useful in the future.

build_support/asan_symbolize.py

Lines changed: 368 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,368 @@
1+
#!/usr/bin/env python
2+
#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3+
#
4+
# The LLVM Compiler Infrastructure
5+
#
6+
# This file is distributed under the University of Illinois Open Source
7+
# License. See LICENSE.TXT for details.
8+
#
9+
#===------------------------------------------------------------------------===#
10+
import bisect
11+
import os
12+
import re
13+
import subprocess
14+
import sys
15+
16+
llvm_symbolizer = None
17+
symbolizers = {}
18+
filetypes = {}
19+
vmaddrs = {}
20+
DEBUG = False
21+
22+
23+
# FIXME: merge the code that calls fix_filename().
24+
def fix_filename(file_name):
25+
for path_to_cut in sys.argv[1:]:
26+
file_name = re.sub('.*' + path_to_cut, '', file_name)
27+
file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
28+
file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
29+
return file_name
30+
31+
32+
class Symbolizer(object):
33+
def __init__(self):
34+
pass
35+
36+
def symbolize(self, addr, binary, offset):
37+
"""Symbolize the given address (pair of binary and offset).
38+
39+
Overridden in subclasses.
40+
Args:
41+
addr: virtual address of an instruction.
42+
binary: path to executable/shared object containing this instruction.
43+
offset: instruction offset in the @binary.
44+
Returns:
45+
list of strings (one string for each inlined frame) describing
46+
the code locations for this instruction (that is, function name, file
47+
name, line and column numbers).
48+
"""
49+
return None
50+
51+
52+
class LLVMSymbolizer(Symbolizer):
53+
def __init__(self, symbolizer_path):
54+
super(LLVMSymbolizer, self).__init__()
55+
self.symbolizer_path = symbolizer_path
56+
self.pipe = self.open_llvm_symbolizer()
57+
58+
def open_llvm_symbolizer(self):
59+
if not os.path.exists(self.symbolizer_path):
60+
return None
61+
cmd = [self.symbolizer_path,
62+
'--use-symbol-table=true',
63+
'--demangle=false',
64+
'--functions=true',
65+
'--inlining=true']
66+
if DEBUG:
67+
print(' '.join(cmd))
68+
return subprocess.Popen(cmd, stdin=subprocess.PIPE,
69+
stdout=subprocess.PIPE)
70+
71+
def symbolize(self, addr, binary, offset):
72+
"""Overrides Symbolizer.symbolize."""
73+
if not self.pipe:
74+
return None
75+
result = []
76+
try:
77+
symbolizer_input = '%s %s' % (binary, offset)
78+
if DEBUG:
79+
print(symbolizer_input)
80+
self.pipe.stdin.write(symbolizer_input)
81+
self.pipe.stdin.write('\n')
82+
while True:
83+
function_name = self.pipe.stdout.readline().rstrip()
84+
if not function_name:
85+
break
86+
file_name = self.pipe.stdout.readline().rstrip()
87+
file_name = fix_filename(file_name)
88+
if (not function_name.startswith('??') and
89+
not file_name.startswith('??')):
90+
# Append only valid frames.
91+
result.append('%s in %s %s' % (addr, function_name,
92+
file_name))
93+
except Exception:
94+
result = []
95+
if not result:
96+
result = None
97+
return result
98+
99+
100+
def LLVMSymbolizerFactory(system):
101+
symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
102+
if not symbolizer_path:
103+
# Assume llvm-symbolizer is in PATH.
104+
symbolizer_path = 'llvm-symbolizer'
105+
return LLVMSymbolizer(symbolizer_path)
106+
107+
108+
class Addr2LineSymbolizer(Symbolizer):
109+
def __init__(self, binary):
110+
super(Addr2LineSymbolizer, self).__init__()
111+
self.binary = binary
112+
self.pipe = self.open_addr2line()
113+
114+
def open_addr2line(self):
115+
cmd = ['addr2line', '-f', '-e', self.binary]
116+
if DEBUG:
117+
print(' '.join(cmd))
118+
return subprocess.Popen(cmd,
119+
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
120+
121+
def symbolize(self, addr, binary, offset):
122+
"""Overrides Symbolizer.symbolize."""
123+
if self.binary != binary:
124+
return None
125+
try:
126+
self.pipe.stdin.write(offset)
127+
self.pipe.stdin.write('\n')
128+
function_name = self.pipe.stdout.readline().rstrip()
129+
file_name = self.pipe.stdout.readline().rstrip()
130+
except Exception:
131+
function_name = ''
132+
file_name = ''
133+
file_name = fix_filename(file_name)
134+
return ['%s in %s %s' % (addr, function_name, file_name)]
135+
136+
137+
class DarwinSymbolizer(Symbolizer):
138+
def __init__(self, addr, binary):
139+
super(DarwinSymbolizer, self).__init__()
140+
self.binary = binary
141+
# Guess which arch we're running. 10 = len('0x') + 8 hex digits.
142+
if len(addr) > 10:
143+
self.arch = 'x86_64'
144+
else:
145+
self.arch = 'i386'
146+
self.vmaddr = None
147+
self.pipe = None
148+
149+
def write_addr_to_pipe(self, offset):
150+
self.pipe.stdin.write('0x%x' % int(offset, 16))
151+
self.pipe.stdin.write('\n')
152+
153+
def open_atos(self):
154+
if DEBUG:
155+
print('atos -o %s -arch %s' % (self.binary, self.arch))
156+
cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
157+
self.pipe = subprocess.Popen(cmdline,
158+
stdin=subprocess.PIPE,
159+
stdout=subprocess.PIPE,
160+
stderr=subprocess.PIPE)
161+
162+
def symbolize(self, addr, binary, offset):
163+
"""Overrides Symbolizer.symbolize."""
164+
if self.binary != binary:
165+
return None
166+
self.open_atos()
167+
self.write_addr_to_pipe(offset)
168+
self.pipe.stdin.close()
169+
atos_line = self.pipe.stdout.readline().rstrip()
170+
# A well-formed atos response looks like this:
171+
# foo(type1, type2) (in object.name) (filename.cc:80)
172+
match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
173+
if DEBUG:
174+
print('atos_line: {0}'.format(atos_line))
175+
if match:
176+
function_name = match.group(1)
177+
function_name = re.sub('\(.*?\)', '', function_name)
178+
file_name = fix_filename(match.group(3))
179+
return ['%s in %s %s' % (addr, function_name, file_name)]
180+
else:
181+
return ['%s in %s' % (addr, atos_line)]
182+
183+
184+
# Chain several symbolizers so that if one symbolizer fails, we fall back
185+
# to the next symbolizer in chain.
186+
class ChainSymbolizer(Symbolizer):
187+
def __init__(self, symbolizer_list):
188+
super(ChainSymbolizer, self).__init__()
189+
self.symbolizer_list = symbolizer_list
190+
191+
def symbolize(self, addr, binary, offset):
192+
"""Overrides Symbolizer.symbolize."""
193+
for symbolizer in self.symbolizer_list:
194+
if symbolizer:
195+
result = symbolizer.symbolize(addr, binary, offset)
196+
if result:
197+
return result
198+
return None
199+
200+
def append_symbolizer(self, symbolizer):
201+
self.symbolizer_list.append(symbolizer)
202+
203+
204+
def BreakpadSymbolizerFactory(binary):
205+
suffix = os.getenv('BREAKPAD_SUFFIX')
206+
if suffix:
207+
filename = binary + suffix
208+
if os.access(filename, os.F_OK):
209+
return BreakpadSymbolizer(filename)
210+
return None
211+
212+
213+
def SystemSymbolizerFactory(system, addr, binary):
214+
if system == 'Darwin':
215+
return DarwinSymbolizer(addr, binary)
216+
elif system == 'Linux':
217+
return Addr2LineSymbolizer(binary)
218+
219+
220+
class BreakpadSymbolizer(Symbolizer):
221+
def __init__(self, filename):
222+
super(BreakpadSymbolizer, self).__init__()
223+
self.filename = filename
224+
lines = file(filename).readlines()
225+
self.files = []
226+
self.symbols = {}
227+
self.address_list = []
228+
self.addresses = {}
229+
# MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
230+
fragments = lines[0].rstrip().split()
231+
self.arch = fragments[2]
232+
self.debug_id = fragments[3]
233+
self.binary = ' '.join(fragments[4:])
234+
self.parse_lines(lines[1:])
235+
236+
def parse_lines(self, lines):
237+
cur_function_addr = ''
238+
for line in lines:
239+
fragments = line.split()
240+
if fragments[0] == 'FILE':
241+
assert int(fragments[1]) == len(self.files)
242+
self.files.append(' '.join(fragments[2:]))
243+
elif fragments[0] == 'PUBLIC':
244+
self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
245+
elif fragments[0] in ['CFI', 'STACK']:
246+
pass
247+
elif fragments[0] == 'FUNC':
248+
cur_function_addr = int(fragments[1], 16)
249+
if not cur_function_addr in self.symbols.keys():
250+
self.symbols[cur_function_addr] = ' '.join(fragments[4:])
251+
else:
252+
# Line starting with an address.
253+
addr = int(fragments[0], 16)
254+
self.address_list.append(addr)
255+
# Tuple of symbol address, size, line, file number.
256+
self.addresses[addr] = (cur_function_addr,
257+
int(fragments[1], 16),
258+
int(fragments[2]),
259+
int(fragments[3]))
260+
self.address_list.sort()
261+
262+
def get_sym_file_line(self, addr):
263+
key = None
264+
if addr in self.addresses.keys():
265+
key = addr
266+
else:
267+
index = bisect.bisect_left(self.address_list, addr)
268+
if index == 0:
269+
return None
270+
else:
271+
key = self.address_list[index - 1]
272+
sym_id, size, line_no, file_no = self.addresses[key]
273+
symbol = self.symbols[sym_id]
274+
filename = self.files[file_no]
275+
if addr < key + size:
276+
return symbol, filename, line_no
277+
else:
278+
return None
279+
280+
def symbolize(self, addr, binary, offset):
281+
if self.binary != binary:
282+
return None
283+
res = self.get_sym_file_line(int(offset, 16))
284+
if res:
285+
function_name, file_name, line_no = res
286+
result = ['%s in %s %s:%d' % (
287+
addr, function_name, file_name, line_no)]
288+
print(result)
289+
return result
290+
else:
291+
return None
292+
293+
294+
class SymbolizationLoop(object):
295+
def __init__(self, binary_name_filter=None):
296+
# Used by clients who may want to supply a different binary name.
297+
# E.g. in Chrome several binaries may share a single .dSYM.
298+
self.binary_name_filter = binary_name_filter
299+
self.system = os.uname()[0]
300+
if self.system in ['Linux', 'Darwin']:
301+
self.llvm_symbolizer = LLVMSymbolizerFactory(self.system)
302+
else:
303+
raise Exception('Unknown system')
304+
305+
def symbolize_address(self, addr, binary, offset):
306+
# Use the chain of symbolizers:
307+
# Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
308+
# (fall back to next symbolizer if the previous one fails).
309+
if not binary in symbolizers:
310+
symbolizers[binary] = ChainSymbolizer(
311+
[BreakpadSymbolizerFactory(binary), self.llvm_symbolizer])
312+
result = symbolizers[binary].symbolize(addr, binary, offset)
313+
if result is None:
314+
# Initialize system symbolizer only if other symbolizers failed.
315+
symbolizers[binary].append_symbolizer(
316+
SystemSymbolizerFactory(self.system, addr, binary))
317+
result = symbolizers[binary].symbolize(addr, binary, offset)
318+
# The system symbolizer must produce some result.
319+
assert result
320+
return result
321+
322+
def print_symbolized_lines(self, symbolized_lines):
323+
if not symbolized_lines:
324+
print(self.current_line)
325+
else:
326+
for symbolized_frame in symbolized_lines:
327+
print(' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip())
328+
self.frame_no += 1
329+
330+
def process_stdin(self):
331+
self.frame_no = 0
332+
333+
if sys.version_info[0] == 2:
334+
sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
335+
else:
336+
# Unbuffered output is not supported in Python 3
337+
sys.stdout = os.fdopen(sys.stdout.fileno(), 'w')
338+
339+
while True:
340+
line = sys.stdin.readline()
341+
if not line: break
342+
self.current_line = line.rstrip()
343+
#0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
344+
stack_trace_line_format = (
345+
'^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
346+
match = re.match(stack_trace_line_format, line)
347+
if not match:
348+
print(self.current_line)
349+
continue
350+
if DEBUG:
351+
print(line)
352+
_, frameno_str, addr, binary, offset = match.groups()
353+
if frameno_str == '0':
354+
# Assume that frame #0 is the first frame of new stack trace.
355+
self.frame_no = 0
356+
original_binary = binary
357+
if self.binary_name_filter:
358+
binary = self.binary_name_filter(binary)
359+
symbolized_line = self.symbolize_address(addr, binary, offset)
360+
if not symbolized_line:
361+
if original_binary != binary:
362+
symbolized_line = self.symbolize_address(addr, binary, offset)
363+
self.print_symbolized_lines(symbolized_line)
364+
365+
366+
if __name__ == '__main__':
367+
loop = SymbolizationLoop()
368+
loop.process_stdin()

0 commit comments

Comments
 (0)