Skip to content

Commit d5195b0

Browse files
Improvements to reference gathering via refs_as_defs in Binja frontend (#91)
* Modifies __main__.py to not use binja APIs directly and adds --refs_as_defs flag * Formats python sources via black formatter * Fixes global variable discovery, refs_as_defs flag, updates roundtrip * Renames `insn` identifiers to `inst` * Adds data xref collection when refs_as_defs is enabled * Applies changes requested in PR and disables binja_none_var_type from CI * Adds comments regarding a bug on tests/binja_none_type Co-authored-by: Artem Dinaburg <artem@trailofbits.com>
1 parent 43ec9ef commit d5195b0

File tree

12 files changed

+243
-189
lines changed

12 files changed

+243
-189
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ set(ROUNDTRIP_TEST_FILES
236236
tests/trunc.c
237237
tests/zeroinit.c
238238
tests/zext.c
239-
tests/binja_var_none_type.c
239+
# tests/binja_var_none_type.c
240240
)
241241

242242
install(

CMakeLists_vcpkg.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ set(ROUNDTRIP_TEST_FILES
221221
tests/trunc.c
222222
tests/zeroinit.c
223223
tests/zext.c
224+
# tests/binja_var_none_type.c
224225
)
225226

226227
install(

lib/Analyze.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -885,6 +885,14 @@ static void RecoverStackMemoryAccesses(
885885
auto &gep = offset_cache[cell.address_const];
886886
if (!gep) {
887887
const auto goal_offset = cell.address_const - frame.min_ea;
888+
889+
// NOTE(surovic):
890+
// This thing iteratively builds up a GEP to point to an offset.
891+
// It moves in increments that are as wide as an element of the
892+
// indexed type. However if there's some leftover offset, it tries
893+
// to do some casting magic to satisfy the request. The assumption
894+
// is that the leftover is smaller than the size of a single element
895+
// which doesn't hold for tests/binja_none_type.
888896
gep = remill::BuildPointerToOffset(
889897
ir, frame_ptr, goal_offset, llvm::PointerType::get(cell.type, 0));
890898
}

python/anvill/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from .arch import *
1717
import os
18+
1819
try:
1920
import ida_idp
2021
from .ida import *

python/anvill/__main__.py

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# along with this program. If not, see <http://www.gnu.org/licenses/>.
1717

1818
import argparse
19+
import json
1920

2021
from .binja import get_program
2122

@@ -35,42 +36,45 @@ def main():
3536
help="Output functions only reachable from given entry point.",
3637
)
3738

38-
args, _ = arg_parser.parse_known_args()
39+
arg_parser.add_argument(
40+
"--refs_as_defs",
41+
action="store_true",
42+
help="Output definitions of discovered functions and variables.",
43+
default=False,
44+
)
45+
46+
args = arg_parser.parse_args()
3947

4048
p = get_program(args.bin_in)
41-
funcs = {}
42-
ep = None
4349

50+
ep = None
4451
if args.entry_point is not None:
4552
try:
4653
ep = int(args.entry_point, 0)
4754
except ValueError:
4855
ep = args.entry_point
4956

50-
def add_callees(ea):
51-
f = p.get_function(ea)
52-
if f not in funcs:
53-
funcs[ea] = f.name()
54-
for c in f._bn_func.callees:
55-
add_callees(c.start)
56-
57-
for ea, name in p.functions:
58-
if not ep:
59-
funcs[ea] = p.get_function(ea).name()
60-
elif ep == (ea if isinstance(ep, int) else name):
61-
add_callees(ea)
62-
break
63-
64-
for ea in funcs:
65-
p.add_symbol(ea, funcs[ea])
66-
p.add_function_definition(ea, True)
67-
68-
for ea, v in p.variables:
69-
for r in v.code_refs:
70-
if r.function.start in funcs:
71-
p.add_variable_definition(ea, False)
72-
73-
open(args.spec_out, "w").write(p.proto())
57+
if ep is None:
58+
for ea in p.functions:
59+
p.add_function_definition(ea, args.refs_as_defs)
60+
elif isinstance(ep, int):
61+
p.add_function_definition(ep, args.refs_as_defs)
62+
else:
63+
for ea, name in p.symbols:
64+
if name == ep:
65+
p.add_function_definition(ea, args.refs_as_defs)
66+
67+
def add_symbol(ea):
68+
for name in p.get_symbols(ea):
69+
p.add_symbol(ea, name)
70+
71+
for f in p.proto()["functions"]:
72+
add_symbol(f["address"])
73+
74+
for v in p.proto()["variables"]:
75+
add_symbol(v["address"])
76+
77+
open(args.spec_out, "w").write(json.dumps(p.proto()))
7478

7579

7680
if __name__ == "__main__":

python/anvill/arch.py

Lines changed: 41 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -947,13 +947,13 @@ class Sparc32Arch(Arch):
947947
"""SPARCv8 architecture description (32-bit)."""
948948

949949
_REG_FAMILY_qX = lambda l: (
950-
("q{}".format(l*4), 0, 16),
951-
("d{}".format(l*2), 0, 8),
952-
("d{}".format(l*2+1), 8, 16),
953-
("f{}".format(l*4), 0, 4),
954-
("f{}".format(l*4+1), 4, 8),
955-
("f{}".format(l*4+2), 8, 12),
956-
("f{}".format(l*4+3), 12, 16),
950+
("q{}".format(l * 4), 0, 16),
951+
("d{}".format(l * 2), 0, 8),
952+
("d{}".format(l * 2 + 1), 8, 16),
953+
("f{}".format(l * 4), 0, 4),
954+
("f{}".format(l * 4 + 1), 4, 8),
955+
("f{}".format(l * 4 + 2), 8, 12),
956+
("f{}".format(l * 4 + 3), 12, 16),
957957
)
958958

959959
_REG_FAMILY_q0 = _REG_FAMILY_qX(0)
@@ -965,8 +965,14 @@ class Sparc32Arch(Arch):
965965
_REG_FAMILY_q24 = _REG_FAMILY_qX(6)
966966
_REG_FAMILY_q28 = _REG_FAMILY_qX(7)
967967

968-
_REG_FAMILY_SP = (("sp", 0, 4), ("o6", 0, 4),)
969-
_REG_FAMILY_FP = (("fp", 0, 4), ("i6", 0, 4),)
968+
_REG_FAMILY_SP = (
969+
("sp", 0, 4),
970+
("o6", 0, 4),
971+
)
972+
_REG_FAMILY_FP = (
973+
("fp", 0, 4),
974+
("i6", 0, 4),
975+
)
970976

971977
_REG_FAMILY = {
972978
"g0": (("g0", 0, 4),),
@@ -977,7 +983,6 @@ class Sparc32Arch(Arch):
977983
"g5": (("g5", 0, 4),),
978984
"g6": (("g6", 0, 4),),
979985
"g7": (("g7", 0, 4),),
980-
981986
"l0": (("l0", 0, 4),),
982987
"l1": (("l1", 0, 4),),
983988
"l2": (("l2", 0, 4),),
@@ -986,7 +991,6 @@ class Sparc32Arch(Arch):
986991
"l5": (("l5", 0, 4),),
987992
"l6": (("l6", 0, 4),),
988993
"l7": (("l7", 0, 4),),
989-
990994
"i0": (("i0", 0, 4),),
991995
"i1": (("i1", 0, 4),),
992996
"i2": (("i2", 0, 4),),
@@ -995,7 +999,6 @@ class Sparc32Arch(Arch):
995999
"i5": (("i5", 0, 4),),
9961000
"i6": _REG_FAMILY_FP,
9971001
"i7": (("i7", 0, 4),),
998-
9991002
"o0": (("o0", 0, 4),),
10001003
"o1": (("o1", 0, 4),),
10011004
"o2": (("o2", 0, 4),),
@@ -1004,12 +1007,10 @@ class Sparc32Arch(Arch):
10041007
"o5": (("o5", 0, 4),),
10051008
"o6": _REG_FAMILY_SP,
10061009
"o7": (("o7", 0, 4),),
1007-
10081010
"sp": _REG_FAMILY_SP,
10091011
"fp": _REG_FAMILY_FP,
10101012
"pc": (("pc", 0, 4),),
10111013
"npc": (("npc", 0, 4),),
1012-
10131014
"q0": _REG_FAMILY_q0,
10141015
"q4": _REG_FAMILY_q4,
10151016
"q8": _REG_FAMILY_q8,
@@ -1018,7 +1019,6 @@ class Sparc32Arch(Arch):
10181019
"q20": _REG_FAMILY_q20,
10191020
"q24": _REG_FAMILY_q24,
10201021
"q28": _REG_FAMILY_q28,
1021-
10221022
"d0": _REG_FAMILY_q0,
10231023
"d2": _REG_FAMILY_q0,
10241024
"d4": _REG_FAMILY_q4,
@@ -1035,7 +1035,6 @@ class Sparc32Arch(Arch):
10351035
"d26": _REG_FAMILY_q24,
10361036
"d28": _REG_FAMILY_q28,
10371037
"d30": _REG_FAMILY_q28,
1038-
10391038
"f0": _REG_FAMILY_q0,
10401039
"f1": _REG_FAMILY_q0,
10411040
"f2": _REG_FAMILY_q0,
@@ -1080,29 +1079,22 @@ def stack_pointer_name(self):
10801079
return "o6"
10811080

10821081
def return_address_proto(self):
1083-
return {
1084-
"register": "o7",
1085-
"type": "I"
1086-
}
1082+
return {"register": "o7", "type": "I"}
10871083

10881084
def return_stack_pointer_proto(self, num_bytes_popped):
1089-
return {
1090-
"register": "o6",
1091-
"offset": 0,
1092-
"type": "I"
1093-
}
1085+
return {"register": "o6", "offset": 0, "type": "I"}
10941086

10951087
def pointer_size(self):
10961088
return 4
10971089

10981090
def register_family(self, reg_name):
1099-
if reg_name.startswith('%'):
1091+
if reg_name.startswith("%"):
11001092
return self._REG_FAMILY[reg_name[1:].lower()]
11011093
else:
11021094
return self._REG_FAMILY[reg_name.lower()]
11031095

11041096
def register_name(self, reg_name):
1105-
if reg_name.startswith('%'):
1097+
if reg_name.startswith("%"):
11061098
return reg_name[1:].lower()
11071099
else:
11081100
return reg_name.lower()
@@ -1112,19 +1104,19 @@ class Sparc64Arch(Arch):
11121104
"""SPARCv9 architecture description (32-bit)."""
11131105

11141106
_REG_FAMILY_qX = lambda l: (
1115-
("q{}".format(l*4), 0, 16),
1116-
("d{}".format(l*2), 0, 8),
1117-
("d{}".format(l*2+1), 8, 16),
1118-
("f{}".format(l*4), 0, 4),
1119-
("f{}".format(l*4+1), 4, 8),
1120-
("f{}".format(l*4+2), 8, 12),
1121-
("f{}".format(l*4+3), 12, 16),
1107+
("q{}".format(l * 4), 0, 16),
1108+
("d{}".format(l * 2), 0, 8),
1109+
("d{}".format(l * 2 + 1), 8, 16),
1110+
("f{}".format(l * 4), 0, 4),
1111+
("f{}".format(l * 4 + 1), 4, 8),
1112+
("f{}".format(l * 4 + 2), 8, 12),
1113+
("f{}".format(l * 4 + 3), 12, 16),
11221114
)
11231115

11241116
_REG_FAMILY_qXv9 = lambda l: (
1125-
("q{}".format(l*4), 0, 16),
1126-
("d{}".format(l*2), 0, 8),
1127-
("d{}".format(l*2+1), 8, 16),
1117+
("q{}".format(l * 4), 0, 16),
1118+
("d{}".format(l * 2), 0, 8),
1119+
("d{}".format(l * 2 + 1), 8, 16),
11281120
)
11291121

11301122
_REG_FAMILY_q0 = _REG_FAMILY_qX(0)
@@ -1147,8 +1139,14 @@ class Sparc64Arch(Arch):
11471139
_REG_FAMILY_q56 = _REG_FAMILY_qXv9(14)
11481140
_REG_FAMILY_q60 = _REG_FAMILY_qXv9(15)
11491141

1150-
_REG_FAMILY_SP = (("sp", 0, 8), ("o6", 0, 8),)
1151-
_REG_FAMILY_FP = (("fp", 0, 8), ("i6", 0, 8),)
1142+
_REG_FAMILY_SP = (
1143+
("sp", 0, 8),
1144+
("o6", 0, 8),
1145+
)
1146+
_REG_FAMILY_FP = (
1147+
("fp", 0, 8),
1148+
("i6", 0, 8),
1149+
)
11521150

11531151
_REG_FAMILY = {
11541152
"g0": (("g0", 0, 8),),
@@ -1159,7 +1157,6 @@ class Sparc64Arch(Arch):
11591157
"g5": (("g5", 0, 8),),
11601158
"g6": (("g6", 0, 8),),
11611159
"g7": (("g7", 0, 8),),
1162-
11631160
"l0": (("l0", 0, 8),),
11641161
"l1": (("l1", 0, 8),),
11651162
"l2": (("l2", 0, 8),),
@@ -1168,7 +1165,6 @@ class Sparc64Arch(Arch):
11681165
"l5": (("l5", 0, 8),),
11691166
"l6": (("l6", 0, 8),),
11701167
"l7": (("l7", 0, 8),),
1171-
11721168
"i0": (("i0", 0, 8),),
11731169
"i1": (("i1", 0, 8),),
11741170
"i2": (("i2", 0, 8),),
@@ -1177,7 +1173,6 @@ class Sparc64Arch(Arch):
11771173
"i5": (("i5", 0, 8),),
11781174
"i6": _REG_FAMILY_FP,
11791175
"i7": (("i7", 0, 8),),
1180-
11811176
"o0": (("o0", 0, 8),),
11821177
"o1": (("o1", 0, 8),),
11831178
"o2": (("o2", 0, 8),),
@@ -1186,12 +1181,10 @@ class Sparc64Arch(Arch):
11861181
"o5": (("o5", 0, 8),),
11871182
"o6": _REG_FAMILY_SP,
11881183
"o7": (("o7", 0, 8),),
1189-
11901184
"sp": _REG_FAMILY_SP,
11911185
"fp": _REG_FAMILY_FP,
11921186
"pc": (("pc", 0, 8),),
11931187
"npc": (("npc", 0, 8),),
1194-
11951188
"q0": _REG_FAMILY_q0,
11961189
"q4": _REG_FAMILY_q4,
11971190
"q8": _REG_FAMILY_q8,
@@ -1208,7 +1201,6 @@ class Sparc64Arch(Arch):
12081201
"q52": _REG_FAMILY_q52,
12091202
"q56": _REG_FAMILY_q56,
12101203
"q60": _REG_FAMILY_q60,
1211-
12121204
"d0": _REG_FAMILY_q0,
12131205
"d2": _REG_FAMILY_q0,
12141206
"d4": _REG_FAMILY_q4,
@@ -1225,7 +1217,6 @@ class Sparc64Arch(Arch):
12251217
"d26": _REG_FAMILY_q24,
12261218
"d28": _REG_FAMILY_q28,
12271219
"d30": _REG_FAMILY_q28,
1228-
12291220
"d32": _REG_FAMILY_q32,
12301221
"d34": _REG_FAMILY_q32,
12311222
"d36": _REG_FAMILY_q36,
@@ -1242,7 +1233,6 @@ class Sparc64Arch(Arch):
12421233
"d58": _REG_FAMILY_q56,
12431234
"d60": _REG_FAMILY_q60,
12441235
"d62": _REG_FAMILY_q60,
1245-
12461236
"f0": _REG_FAMILY_q0,
12471237
"f1": _REG_FAMILY_q0,
12481238
"f2": _REG_FAMILY_q0,
@@ -1287,29 +1277,22 @@ def stack_pointer_name(self):
12871277
return "o6"
12881278

12891279
def return_address_proto(self):
1290-
return {
1291-
"register": "o7",
1292-
"type": "L"
1293-
}
1280+
return {"register": "o7", "type": "L"}
12941281

12951282
def return_stack_pointer_proto(self, num_bytes_popped):
1296-
return {
1297-
"register": "o6",
1298-
"offset": 0,
1299-
"type": "L"
1300-
}
1283+
return {"register": "o6", "offset": 0, "type": "L"}
13011284

13021285
def pointer_size(self):
13031286
return 8
13041287

13051288
def register_family(self, reg_name):
1306-
if reg_name.startswith('%'):
1289+
if reg_name.startswith("%"):
13071290
return self._REG_FAMILY[reg_name[1:].lower()]
13081291
else:
13091292
return self._REG_FAMILY[reg_name.lower()]
13101293

13111294
def register_name(self, reg_name):
1312-
if reg_name.startswith('%'):
1295+
if reg_name.startswith("%"):
13131296
return reg_name[1:].lower()
13141297
else:
13151298
return reg_name.lower()

0 commit comments

Comments
 (0)