Skip to content

Commit de4b989

Browse files
committed
[BOLT] Add unit tests for negate_ra_state cfi handling
- also add match_dwarf.py, a tool used by the unit tests.
1 parent e81445c commit de4b989

File tree

5 files changed

+233
-21
lines changed

5 files changed

+233
-21
lines changed

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2787,14 +2787,8 @@ struct CFISnapshot {
27872787
case MCCFIInstruction::OpLLVMDefAspaceCfa:
27882788
case MCCFIInstruction::OpLabel:
27892789
case MCCFIInstruction::OpValOffset:
2790-
llvm_unreachable("unsupported CFI opcode");
2791-
break;
27922790
case MCCFIInstruction::OpNegateRAState:
2793-
if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) {
2794-
llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. "
2795-
"as produced by '-mbranch-protection=pac-ret') are "
2796-
"currently not supported by BOLT.");
2797-
}
2791+
llvm_unreachable("unsupported CFI opcode");
27982792
break;
27992793
case MCCFIInstruction::OpRememberState:
28002794
case MCCFIInstruction::OpRestoreState:
@@ -2934,15 +2928,9 @@ struct CFISnapshotDiff : public CFISnapshot {
29342928
case MCCFIInstruction::OpLLVMDefAspaceCfa:
29352929
case MCCFIInstruction::OpLabel:
29362930
case MCCFIInstruction::OpValOffset:
2931+
case MCCFIInstruction::OpNegateRAState:
29372932
llvm_unreachable("unsupported CFI opcode");
29382933
return false;
2939-
case MCCFIInstruction::OpNegateRAState:
2940-
if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) {
2941-
llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. "
2942-
"as produced by '-mbranch-protection=pac-ret') are "
2943-
"currently not supported by BOLT.");
2944-
}
2945-
break;
29462934
case MCCFIInstruction::OpRememberState:
29472935
case MCCFIInstruction::OpRestoreState:
29482936
case MCCFIInstruction::OpGnuArgsSize:
@@ -3091,14 +3079,8 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
30913079
case MCCFIInstruction::OpLLVMDefAspaceCfa:
30923080
case MCCFIInstruction::OpLabel:
30933081
case MCCFIInstruction::OpValOffset:
3094-
llvm_unreachable("unsupported CFI opcode");
3095-
break;
30963082
case MCCFIInstruction::OpNegateRAState:
3097-
if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) {
3098-
llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. "
3099-
"as produced by '-mbranch-protection=pac-ret') are "
3100-
"currently not supported by BOLT.");
3101-
}
3083+
llvm_unreachable("unsupported CFI opcode");
31023084
break;
31033085
case MCCFIInstruction::OpGnuArgsSize:
31043086
// do not affect CFI state
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
2+
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
3+
# RUN: llvm-bolt %t.exe -o %t.exe.bolt | FileCheck %s
4+
5+
# check that the output is listing foo as incorrect
6+
# CHECK: BOLT-INFO: inconsistent RAStates in function foo
7+
8+
# check that foo got Ignored, so it's not in the new .text section
9+
# RUN: llvm-objdump %t.exe.bolt -d -j .text > %t.exe.dump
10+
# RUN: not grep "<foo>:" %t.exe.dump
11+
12+
13+
# How is this test incorrect?
14+
# There is an extra .cfi_negate_ra_state in foo.
15+
# Because of this, we will get to the autiasp (hint #29)
16+
# in a (seemingly) unsigned state. That is incorrect.
17+
.text
18+
.globl foo
19+
.p2align 2
20+
.type foo,@function
21+
foo:
22+
.cfi_startproc
23+
hint #25
24+
.cfi_negate_ra_state
25+
sub sp, sp, #16
26+
stp x29, x30, [sp, #16] // 16-byte Folded Spill
27+
.cfi_def_cfa_offset 16
28+
str w0, [sp, #12]
29+
ldr w8, [sp, #12]
30+
.cfi_negate_ra_state
31+
add w0, w8, #1
32+
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
33+
add sp, sp, #16
34+
hint #29
35+
.cfi_negate_ra_state
36+
ret
37+
.Lfunc_end1:
38+
.size foo, .Lfunc_end1-foo
39+
.cfi_endproc
40+
41+
.global _start
42+
.type _start, %function
43+
_start:
44+
b foo
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
2+
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
3+
4+
# RUN: llvm-objdump %t.exe -d > %t.exe.dump
5+
# RUN: llvm-objdump --dwarf=frames %t.exe > %t.exe.dump-dwarf
6+
# RUN: match-dwarf %t.exe.dump %t.exe.dump-dwarf foo > %t.match-dwarf.txt
7+
8+
# RUN: llvm-bolt %t.exe -o %t.exe.bolt
9+
10+
# RUN: llvm-objdump %t.exe.bolt -d > %t.exe.bolt.dump
11+
# RUN: llvm-objdump --dwarf=frames %t.exe.bolt > %t.exe.bolt.dump-dwarf
12+
# RUN: match-dwarf %t.exe.bolt.dump %t.exe.bolt.dump-dwarf foo > %t.bolt.match-dwarf.txt
13+
14+
# RUN: diff %t.match-dwarf.txt %t.bolt.match-dwarf.txt
15+
16+
.text
17+
.globl foo
18+
.p2align 2
19+
.type foo,@function
20+
foo:
21+
.cfi_startproc
22+
hint #25
23+
.cfi_negate_ra_state
24+
sub sp, sp, #16
25+
stp x29, x30, [sp, #16] // 16-byte Folded Spill
26+
.cfi_def_cfa_offset 16
27+
str w0, [sp, #12]
28+
ldr w8, [sp, #12]
29+
add w0, w8, #1
30+
ldp x29, x30, [sp, #16] // 16-byte Folded Reload
31+
add sp, sp, #16
32+
hint #29
33+
.cfi_negate_ra_state
34+
ret
35+
.Lfunc_end1:
36+
.size foo, .Lfunc_end1-foo
37+
.cfi_endproc
38+
39+
.global _start
40+
.type _start, %function
41+
_start:
42+
b foo

bolt/test/lit.cfg.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@
100100
config.substitutions.append(("%cxxflags", ""))
101101

102102
link_fdata_cmd = os.path.join(config.test_source_root, "link_fdata.py")
103+
match_dwarf_cmd = os.path.join(config.test_source_root, "match_dwarf.py")
103104

104105
tool_dirs = [config.llvm_tools_dir, config.test_source_root]
105106

@@ -143,6 +144,12 @@
143144
ToolSubst("llvm-readobj", unresolved="fatal"),
144145
ToolSubst("llvm-dwp", unresolved="fatal"),
145146
ToolSubst("split-file", unresolved="fatal"),
147+
ToolSubst(
148+
"match-dwarf",
149+
command=sys.executable,
150+
unresolved="fatal",
151+
extra_args=[match_dwarf_cmd],
152+
),
146153
]
147154
llvm_config.add_tool_substitutions(tools, tool_dirs)
148155

bolt/test/match_dwarf.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
#!/usr/bin/env python3
2+
3+
# This tool helps matching dwarf dumps
4+
# (= the output from running llvm-objdump --dwarf=frames),
5+
# by address to function names (which are parsed from a normal objdump).
6+
# The script is used for checking if .cfi_negate_ra_state CFIs
7+
# are generated by BOLT the same way they are generated by LLVM.
8+
# The script is called twice in unittests: once with the objdumps of
9+
# the BOLT input binary, and once with the output binary from BOLT.
10+
# We output the offsets of .cfi_negate_ra_state instructions from the
11+
# function's start address to see that BOLT can generate them to the same
12+
# locations.
13+
# Because we check the location, this is only useful for testing without
14+
# optimization flags, so `llvm-bolt input.exe -o output.exe`
15+
16+
17+
import argparse
18+
import subprocess
19+
import sys
20+
import re
21+
22+
23+
class NameDwarfPair(object):
24+
def __init__(self, name, body):
25+
self.name = name
26+
self.body = body
27+
self.finalized = False
28+
29+
def append(self, body_line):
30+
# only store elements into the body until the first whitespace line is encountered.
31+
if body_line.isspace():
32+
self.finalized = True
33+
if not self.finalized:
34+
self.body += body_line
35+
36+
def print(self):
37+
print(self.name)
38+
print(self.body)
39+
40+
def parse_negate_offsets(self):
41+
"""
42+
Create a list of locations/offsets of the negate_ra_state CFIs in the
43+
dwarf entry. To find offsets for each, we match the DW_CFA_advance_loc
44+
entries, and sum up their values.
45+
"""
46+
negate_offsets = []
47+
loc = 0
48+
# TODO: make sure this is not printed in hex
49+
re_advloc = r"DW_CFA_advance_loc: (\d+)"
50+
51+
for line in self.body.splitlines():
52+
# if line matches advance_loc int
53+
match = re.search(re_advloc, line)
54+
if match:
55+
loc += int(match.group(1))
56+
if "DW_CFA_AARCH64_negate_ra_state" in line:
57+
negate_offsets.append(loc)
58+
59+
self.negate_offsets = negate_offsets
60+
61+
def __eq__(self, other):
62+
return self.name == other.name and self.negate_offsets == other.negate_offsets
63+
64+
65+
def extract_function_addresses(objdump):
66+
"""
67+
Parse and return address-to-name dictionary from objdump file.
68+
Function names in the objdump look like this:
69+
000123abc <foo>:
70+
We create a dict from the addr (000123abc), to the name (foo).
71+
"""
72+
addr_name_dict = dict()
73+
re_function = re.compile(r"^([0-9a-fA-F]+)\s<(.*)>:$")
74+
with open(objdump, "r") as f:
75+
for line in f.readlines():
76+
match = re_function.match(line)
77+
if not match:
78+
continue
79+
m_addr = match.groups()[0]
80+
m_name = match.groups()[1]
81+
addr_name_dict[int(m_addr, 16)] = m_name
82+
83+
return addr_name_dict
84+
85+
86+
def match_dwarf_to_name(dwarfdump, addr_name_dict):
87+
"""
88+
Parse dwarf dump, and match names to blocks using the dict from the objdump.
89+
Return a list of NameDwarfPairs.
90+
The matched lines look like this:
91+
000123 000456 000789 FDE cie=000000 pc=0123abc...0456def
92+
We do not have the function name for this, only the PC range it applies to.
93+
We match the pc=0123abc (the start address), and find the matching name from
94+
the addr_name_dict.
95+
The resultint NameDwarfPair will hold the lines this header applied to, and
96+
instead of the header with the addresses, it will just have the function name.
97+
"""
98+
re_address_line = re.compile(r".*pc=([0-9a-fA-F]+)\.\.\.([0-9a-fA-F]+)")
99+
with open(dwarfdump, "r") as dw:
100+
functions = []
101+
for line in dw.readlines():
102+
match = re_address_line.match(line)
103+
if not match:
104+
if len(functions) > 0:
105+
functions[-1].append(line)
106+
continue
107+
pc_start_address = match.groups()[0]
108+
name = addr_name_dict.get(int(pc_start_address, 16))
109+
functions.append(NameDwarfPair(name, ""))
110+
111+
return functions
112+
113+
114+
def main():
115+
parser = argparse.ArgumentParser()
116+
parser.add_argument("objdump", help="Objdump file")
117+
parser.add_argument(
118+
"dwarfdump", help="dwarf dump file created with 'llvm-objdump --dwarf=frames'"
119+
)
120+
parser.add_argument("function", help="Function to search CFIs in.")
121+
122+
args = parser.parse_args()
123+
124+
addr_name_dict = extract_function_addresses(args.objdump)
125+
functions = match_dwarf_to_name(args.dwarfdump, addr_name_dict)
126+
127+
for f in functions:
128+
if f.name == args.function:
129+
f.parse_negate_offsets()
130+
print(f.negate_offsets)
131+
break
132+
else:
133+
print(f"{args.function} not found")
134+
exit(-1)
135+
136+
137+
main()

0 commit comments

Comments
 (0)