Skip to content

Commit 5dba078

Browse files
committed
[lldb/crashlog] Load inlined symbol into interactive crashlog
Sometimes, crash reports come with inlined symbols. These provide the exact stacktrace from the user binary. However, when investigating a crash, it's very likely that the images related to the crashed thread are not available on the debugging user system or that the versions don't match. This causes interactive crashlog to show a degraded backtrace in lldb. This patch aims to address that issue, by parsing the inlined symbols from the crash report and load them into lldb's target. This patch is a follow-up to 27f27d1, focusing on inlined symbols loading from legacy (non-json) crash reports. To do so, it updates the stack frame regular expression to make the capture groups more granular, to be able to extract the symbol name, the offset and the source location if available, while making it more maintainable. So now, when parsing the crash report, we build a data structure containing all the symbol information for each stackframe. Then, after launching the scripted process for interactive mode, we write a JSON symbol file for each module, only containing the symbols that it contains. Finally, we load the json symbol file into lldb, before showing the user the process status and backtrace. rdar://97345586 Differential Revision: https://reviews.llvm.org/D146765 Signed-off-by: Med Ismail Bennani <[email protected]>
1 parent cb8613a commit 5dba078

File tree

4 files changed

+337
-61
lines changed

4 files changed

+337
-61
lines changed

lldb/examples/python/crashlog.py

Lines changed: 154 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,8 @@ def __init__(self, debugger, path, verbose):
498498
self.path = os.path.expanduser(path)
499499
self.verbose = verbose
500500
self.crashlog = CrashLog(debugger, self.path, self.verbose)
501+
# List of DarwinImages sorted by their index.
502+
self.images = list()
501503

502504
@abc.abstractmethod
503505
def parse(self):
@@ -525,8 +527,6 @@ def parse_json(buffer):
525527

526528
def __init__(self, debugger, path, verbose):
527529
super().__init__(debugger, path, verbose)
528-
# List of DarwinImages sorted by their index.
529-
self.images = list()
530530

531531
def parse(self):
532532
try:
@@ -670,13 +670,44 @@ def parse_asi_backtrace(self, thread, bt):
670670
print("error: can't parse application specific backtrace.")
671671
return False
672672

673-
(frame_id, frame_img_name, frame_addr, frame_ofs) = frame_match.groups()
673+
frame_id = frame_img_name = frame_addr = frame_symbol = frame_offset = frame_file = frame_line = frame_column = None
674+
675+
if len(frame_match.groups()) == 3:
676+
# Get the image UUID from the frame image name.
677+
(frame_id, frame_img_name, frame_addr) = frame_match.groups()
678+
elif len(frame_match.groups()) == 5:
679+
(frame_id, frame_img_name, frame_addr,
680+
frame_symbol, frame_offset) = frame_match.groups()
681+
elif len(frame_match.groups()) == 7:
682+
(frame_id, frame_img_name, frame_addr,
683+
frame_symbol, frame_offset,
684+
frame_file, frame_line) = frame_match.groups()
685+
elif len(frame_match.groups()) == 8:
686+
(frame_id, frame_img_name, frame_addr,
687+
frame_symbol, frame_offset,
688+
frame_file, frame_line, frame_column) = frame_match.groups()
674689

675690
thread.add_ident(frame_img_name)
676691
if frame_img_name not in self.crashlog.idents:
677692
self.crashlog.idents.append(frame_img_name)
693+
694+
description = ""
695+
if frame_img_name and frame_addr and frame_symbol:
696+
description = frame_symbol
697+
frame_offset_value = 0
698+
if frame_offset:
699+
description += " + " + frame_offset
700+
frame_offset_value = int(frame_offset, 0)
701+
for image in self.images:
702+
if image.identifier == frame_img_name:
703+
image.symbols[frame_symbol] = {
704+
"name": frame_symbol,
705+
"type": "code",
706+
"address": int(frame_addr, 0) - frame_offset_value,
707+
}
708+
678709
thread.frames.append(
679-
self.crashlog.Frame(int(frame_id), int(frame_addr, 0), frame_ofs)
710+
self.crashlog.Frame(int(frame_id), int(frame_addr, 0), description)
680711
)
681712

682713
return True
@@ -720,36 +751,59 @@ class CrashLogParseMode:
720751

721752

722753
class TextCrashLogParser(CrashLogParser):
723-
parent_process_regex = re.compile(r"^Parent Process:\s*(.*)\[(\d+)\]")
724-
thread_state_regex = re.compile(r"^Thread \d+ crashed with")
725-
thread_instrs_regex = re.compile(r"^Thread \d+ instruction stream")
726-
thread_regex = re.compile(r"^Thread (\d+).*:")
727-
app_backtrace_regex = re.compile(r"^Application Specific Backtrace (\d+).*:")
728-
version = r"\(.+\)|(?:arm|x86_)[0-9a-z]+"
729-
frame_regex = re.compile(
730-
r"^(\d+)\s+" # id
731-
r"(.+?)\s+" # img_name
732-
r"(?:" + version + r"\s+)?" # img_version
733-
r"(0x[0-9a-fA-F]{4,})" # addr (4 chars or more)
734-
r"(?: +(.*))?" # offs
735-
)
736-
null_frame_regex = re.compile(r"^\d+\s+\?\?\?\s+0{4,} +")
737-
image_regex_uuid = re.compile(
738-
r"(0x[0-9a-fA-F]+)" # img_lo
739-
r"\s+-\s+" # -
740-
r"(0x[0-9a-fA-F]+)\s+" # img_hi
741-
r"[+]?(.+?)\s+" # img_name
742-
r"(?:(" + version + r")\s+)?" # img_version
743-
r"(?:<([-0-9a-fA-F]+)>\s+)?" # img_uuid
744-
r"(\?+|/.*)" # img_path
745-
)
746-
exception_type_regex = re.compile(
747-
r"^Exception Type:\s+(EXC_[A-Z_]+)(?:\s+\((.*)\))?"
748-
)
749-
exception_codes_regex = re.compile(
750-
r"^Exception Codes:\s+(0x[0-9a-fA-F]+),\s*(0x[0-9a-fA-F]+)"
751-
)
752-
exception_extra_regex = re.compile(r"^Exception\s+.*:\s+(.*)")
754+
parent_process_regex = re.compile(r'^Parent Process:\s*(.*)\[(\d+)\]')
755+
thread_state_regex = re.compile(r'^Thread \d+ crashed with')
756+
thread_instrs_regex = re.compile(r'^Thread \d+ instruction stream')
757+
thread_regex = re.compile(r'^Thread (\d+).*:')
758+
app_backtrace_regex = re.compile(r'^Application Specific Backtrace (\d+).*:')
759+
760+
class VersionRegex:
761+
version = r'\(.+\)|(?:arm|x86_)[0-9a-z]+'
762+
763+
class FrameRegex(VersionRegex):
764+
@classmethod
765+
def get(cls):
766+
index = r'^(\d+)\s+'
767+
img_name = r'(.+?)\s+'
768+
version = r'(?:' + super().version + r'\s+)?'
769+
address = r'(0x[0-9a-fA-F]{4,})' # 4 digits or more
770+
771+
symbol = """
772+
(?:
773+
[ ]+
774+
(?P<symbol>.+)
775+
(?:
776+
[ ]\+[ ]
777+
(?P<symbol_offset>\d+)
778+
)
779+
(?:
780+
[ ]\(
781+
(?P<file_name>[^:]+):(?P<line_number>\d+)
782+
(?:
783+
:(?P<column_num>\d+)
784+
)?
785+
)?
786+
)?
787+
"""
788+
789+
return re.compile(index + img_name + version + address + symbol,
790+
flags=re.VERBOSE)
791+
792+
frame_regex = FrameRegex.get()
793+
null_frame_regex = re.compile(r'^\d+\s+\?\?\?\s+0{4,} +')
794+
image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)' # img_lo
795+
r'\s+-\s+' # -
796+
r'(0x[0-9a-fA-F]+)\s+' # img_hi
797+
r'[+]?(.+?)\s+' # img_name
798+
r'(?:(' +
799+
VersionRegex.version + # img_version
800+
r')\s+)?'
801+
r'(?:<([-0-9a-fA-F]+)>\s+)?' # img_uuid
802+
r'(\?+|/.*)' # img_path
803+
)
804+
exception_type_regex = re.compile(r'^Exception Type:\s+(EXC_[A-Z_]+)(?:\s+\((.*)\))?')
805+
exception_codes_regex = re.compile(r'^Exception Codes:\s+(0x[0-9a-fA-F]+),\s*(0x[0-9a-fA-F]+)')
806+
exception_extra_regex = re.compile(r'^Exception\s+.*:\s+(.*)')
753807

754808
def __init__(self, debugger, path, verbose):
755809
super().__init__(debugger, path, verbose)
@@ -764,6 +818,7 @@ def __init__(self, debugger, path, verbose):
764818
CrashLogParseMode.SYSTEM: self.parse_system,
765819
CrashLogParseMode.INSTRS: self.parse_instructions,
766820
}
821+
self.symbols = {}
767822

768823
def parse(self):
769824
with open(self.path, "r") as f:
@@ -927,38 +982,76 @@ def parse_thread(self, line):
927982
print('warning: thread parser ignored null-frame: "%s"' % line)
928983
return
929984
frame_match = self.frame_regex.search(line)
930-
if frame_match:
931-
(frame_id, frame_img_name, frame_addr, frame_ofs) = frame_match.groups()
932-
ident = frame_img_name
933-
self.thread.add_ident(ident)
934-
if ident not in self.crashlog.idents:
935-
self.crashlog.idents.append(ident)
936-
self.thread.frames.append(
937-
self.crashlog.Frame(int(frame_id), int(frame_addr, 0), frame_ofs)
938-
)
939-
else:
985+
if not frame_match:
940986
print('error: frame regex failed for line: "%s"' % line)
987+
return
988+
989+
frame_id = frame_img_name = frame_addr = frame_symbol = frame_offset = frame_file = frame_line = frame_column = None
990+
991+
if len(frame_match.groups()) == 3:
992+
# Get the image UUID from the frame image name.
993+
(frame_id, frame_img_name, frame_addr) = frame_match.groups()
994+
elif len(frame_match.groups()) == 5:
995+
(frame_id, frame_img_name, frame_addr,
996+
frame_symbol, frame_offset) = frame_match.groups()
997+
elif len(frame_match.groups()) == 7:
998+
(frame_id, frame_img_name, frame_addr,
999+
frame_symbol, frame_offset,
1000+
frame_file, frame_line) = frame_match.groups()
1001+
elif len(frame_match.groups()) == 8:
1002+
(frame_id, frame_img_name, frame_addr,
1003+
frame_symbol, frame_offset,
1004+
frame_file, frame_line, frame_column) = frame_match.groups()
1005+
1006+
self.thread.add_ident(frame_img_name)
1007+
if frame_img_name not in self.crashlog.idents:
1008+
self.crashlog.idents.append(frame_img_name)
1009+
1010+
description = ""
1011+
# Since images are parsed after threads, we need to build a
1012+
# map for every image with a list of all the symbols and addresses
1013+
if frame_img_name and frame_addr and frame_symbol:
1014+
description = frame_symbol
1015+
frame_offset_value = 0
1016+
if frame_offset:
1017+
description += " + " + frame_offset
1018+
frame_offset_value = int(frame_offset, 0)
1019+
if frame_img_name not in self.symbols:
1020+
self.symbols[frame_img_name] = list()
1021+
self.symbols[frame_img_name].append(
1022+
{
1023+
"name": frame_symbol,
1024+
"address": int(frame_addr, 0) - frame_offset_value,
1025+
}
1026+
)
1027+
1028+
self.thread.frames.append(
1029+
self.crashlog.Frame(int(frame_id), int(frame_addr, 0), description)
1030+
)
9411031

9421032
def parse_images(self, line):
9431033
image_match = self.image_regex_uuid.search(line)
9441034
if image_match:
945-
(
946-
img_lo,
947-
img_hi,
948-
img_name,
949-
img_version,
950-
img_uuid,
951-
img_path,
952-
) = image_match.groups()
953-
image = self.crashlog.DarwinImage(
954-
int(img_lo, 0),
955-
int(img_hi, 0),
956-
img_name.strip(),
957-
img_version.strip() if img_version else "",
958-
uuid.UUID(img_uuid),
959-
img_path,
960-
self.verbose,
961-
)
1035+
(img_lo, img_hi, img_name, img_version,
1036+
img_uuid, img_path) = image_match.groups()
1037+
1038+
image = self.crashlog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
1039+
img_name.strip(),
1040+
img_version.strip()
1041+
if img_version else "",
1042+
uuid.UUID(img_uuid), img_path,
1043+
self.verbose)
1044+
unqualified_img_name = os.path.basename(img_path)
1045+
if unqualified_img_name in self.symbols:
1046+
for symbol in self.symbols[unqualified_img_name]:
1047+
image.symbols[symbol["name"]] = {
1048+
"name": symbol["name"],
1049+
"type": "code",
1050+
# NOTE: "address" is actually the symbol image offset
1051+
"address": symbol["address"] - int(img_lo, 0),
1052+
}
1053+
1054+
self.images.append(image)
9621055
self.crashlog.images.append(image)
9631056
else:
9641057
print("error: image regex failed for: %s" % line)

0 commit comments

Comments
 (0)