Skip to content

Commit 9630389

Browse files
committed
WIP: Python: Collect and pass mappings when copying MLIL functions
1 parent 4912268 commit 9630389

File tree

4 files changed

+157
-18
lines changed

4 files changed

+157
-18
lines changed

python/commonil.py

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
# IN THE SOFTWARE.
2020

2121
from dataclasses import dataclass
22-
from typing import Union
22+
from typing import Union, Optional
2323
from .flowgraph import FlowGraph, FlowGraphNode
2424
from .enums import BranchType
2525
from .interaction import show_graph_report
@@ -210,7 +210,6 @@ class AliasedVariableInstruction(VariableInstruction):
210210
pass
211211

212212

213-
@dataclass
214213
class ILSourceLocation:
215214
"""
216215
ILSourceLocation is used to indicate where expressions were defined during the lifting process
@@ -220,15 +219,52 @@ class ILSourceLocation:
220219
address: int
221220
source_operand: int
222221

222+
source_llil_instruction: Optional['lowlevelil.LowLevelILInstruction'] = None
223+
source_mlil_instruction: Optional['mediumlevelil.MediumLevelILInstruction'] = None
224+
source_hlil_instruction: Optional['highlevelil.HighLevelILInstruction'] = None
225+
il_direct: bool = True
226+
227+
def __init__(self, address: int, source_operand: int):
228+
self.address = address
229+
self.source_operand = source_operand
230+
231+
def __repr__(self):
232+
instr = ""
233+
if self.source_llil_instruction is not None:
234+
instr = f" (from LLIL {self.source_llil_instruction})"
235+
if self.source_mlil_instruction is not None:
236+
instr = f" (from MLIL {self.source_mlil_instruction})"
237+
if self.source_hlil_instruction is not None:
238+
instr = f" (from HLIL {self.source_hlil_instruction})"
239+
return f"<ILSourceLocation: {self.address:x}, {self.source_operand}{instr}>"
240+
241+
def __hash__(self):
242+
return hash((self.address, self.source_operand))
243+
244+
def __eq__(self, other):
245+
if not isinstance(other, ILSourceLocation):
246+
return False
247+
return self.address == other.address and self.source_operand == other.source_operand
248+
223249
@classmethod
224250
def from_instruction(
225251
cls,
226-
instr: Union['lowlevelil.LowLevelILInstruction', 'mediumlevelil.MediumLevelILInstruction', 'highlevelil.HighLevelILInstruction']
252+
instr: Union['lowlevelil.LowLevelILInstruction', 'mediumlevelil.MediumLevelILInstruction', 'highlevelil.HighLevelILInstruction'],
253+
il_direct: bool = True
227254
) -> 'ILSourceLocation':
228255
"""
229256
Get the source location of a given instruction
230257
:param instr: Instruction, Low, Medium, or High level
231258
:return: Its location
232259
"""
233-
return cls(instr.address, instr.source_operand)
234-
260+
loc = cls(instr.address, instr.source_operand)
261+
if isinstance(instr, lowlevelil.LowLevelILInstruction):
262+
loc.source_llil_instruction = instr
263+
elif isinstance(instr, mediumlevelil.MediumLevelILInstruction):
264+
loc.source_mlil_instruction = instr
265+
elif isinstance(instr, highlevelil.HighLevelILInstruction):
266+
loc.source_hlil_instruction = instr
267+
else:
268+
log_warn(f"Unknown instruction type {type(instr)}")
269+
loc.il_direct = il_direct
270+
return loc

python/examples/wf_test_copy_expr.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def assert_llil_eq(old_insn: LowLevelILInstruction, new_insn: LowLevelILInstruct
6767
assert old_op[1] == new_op[1], err_msg
6868

6969

70-
def assert_mlil_eq(old_insn: LowLevelILInstruction, new_insn: LowLevelILInstruction):
70+
def assert_mlil_eq(old_insn: MediumLevelILInstruction, new_insn: MediumLevelILInstruction):
7171
"""
7272
Make sure that these two instructions are the same (probably correct). Asserts otherwise.
7373
@@ -78,10 +78,12 @@ def assert_mlil_eq(old_insn: LowLevelILInstruction, new_insn: LowLevelILInstruct
7878
"""
7979
err_msg = (hex(old_insn.address), old_insn, new_insn)
8080
assert old_insn.operation == new_insn.operation, err_msg
81-
# assert old_insn.attributes == new_insn.attributes, err_msg
81+
assert old_insn.attributes == new_insn.attributes, err_msg
8282
assert old_insn.size == new_insn.size, err_msg
8383
assert old_insn.source_location == new_insn.source_location, err_msg
8484
assert len(old_insn.operands) == len(new_insn.operands), err_msg
85+
assert old_insn.expr_type == new_insn.expr_type, err_msg
86+
8587
# Can't compare operands directly since IL expression indices might change when
8688
# copying an instruction to another function
8789
for i, (old_op, new_op) in enumerate(zip(old_insn.detailed_operands, new_insn.detailed_operands)):
@@ -241,30 +243,45 @@ def translate_instr(
241243
assert_mlil_eq(old_insn, new_insn)
242244

243245

244-
wf = Workflow("core.function.metaAnalysis").clone("TestCopyExpr")
246+
wf = Workflow("core.function.metaAnalysis").clone("core.function.metaAnalysis")
245247

246248
# Define the custom activity configuration
247249
wf.register_activity(Activity(
248250
configuration=json.dumps({
249251
"name": "extension.test_copy_expr.lil_action",
250252
"title": "Lifted IL copy_expr Test",
251-
"description": "Makes sure copy_expr works on Lifted IL functions."
253+
"description": "Makes sure copy_expr works on Lifted IL functions.",
254+
"eligibility": {
255+
"auto": {
256+
"default": False
257+
}
258+
}
252259
}),
253260
action=lil_action
254261
))
255262
wf.register_activity(Activity(
256263
configuration=json.dumps({
257264
"name": "extension.test_copy_expr.llil_action",
258265
"title": "Low Level IL copy_expr Test",
259-
"description": "Makes sure copy_expr works on Low Level IL functions."
266+
"description": "Makes sure copy_expr works on Low Level IL functions.",
267+
"eligibility": {
268+
"auto": {
269+
"default": False
270+
}
271+
}
260272
}),
261273
action=llil_action
262274
))
263275
wf.register_activity(Activity(
264276
configuration=json.dumps({
265277
"name": "extension.test_copy_expr.mlil_action",
266278
"title": "Medium Level IL copy_expr Test",
267-
"description": "Makes sure copy_expr works on Medium Level IL functions."
279+
"description": "Makes sure copy_expr works on Medium Level IL functions.",
280+
"eligibility": {
281+
"auto": {
282+
"default": False
283+
}
284+
}
268285
}),
269286
action=mlil_action
270287
))

python/mediumlevelil.py

Lines changed: 90 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3302,6 +3302,11 @@ def __init__(
33023302
self._arch = _arch
33033303
self._source_function = _source_function
33043304

3305+
self._mlil_to_mlil_expr_map: dict['MediumLevelILInstruction', List[Tuple[ExpressionIndex, bool]]] = {}
3306+
self._mlil_to_mlil_instr_map: dict['MediumLevelILInstruction', List[Tuple[InstructionIndex, bool]]] = {}
3307+
self._llil_ssa_to_mlil_expr_map: dict['lowlevelil.LowLevelILInstruction', List[Tuple[ExpressionIndex, bool]]] = {}
3308+
self._llil_ssa_to_mlil_instr_map: dict['lowlevelil.LowLevelILInstruction', List[Tuple[InstructionIndex, bool]]] = {}
3309+
33053310
def __del__(self):
33063311
if core is not None:
33073312
core.BNFreeMediumLevelILFunction(self.handle)
@@ -3548,7 +3553,7 @@ def expr(
35483553
elif isinstance(operation, MediumLevelILOperation):
35493554
_operation = operation.value
35503555
if source_location is not None:
3551-
return ExpressionIndex(core.BNMediumLevelILAddExprWithLocation(
3556+
index = ExpressionIndex(core.BNMediumLevelILAddExprWithLocation(
35523557
self.handle,
35533558
_operation,
35543559
source_location.address,
@@ -3560,6 +3565,17 @@ def expr(
35603565
d,
35613566
e
35623567
))
3568+
# Update internal mappings to remember this
3569+
if source_location.source_mlil_instruction is not None:
3570+
if source_location.source_mlil_instruction not in self._mlil_to_mlil_expr_map:
3571+
self._mlil_to_mlil_expr_map[source_location.source_mlil_instruction] = []
3572+
self._mlil_to_mlil_expr_map[source_location.source_mlil_instruction].append((index, source_location.il_direct))
3573+
if source_location.source_llil_instruction is not None \
3574+
and source_location.source_llil_instruction.function.il_form == FunctionGraphType.LowLevelILSSAFormFunctionGraph:
3575+
if source_location.source_llil_instruction not in self._llil_ssa_to_mlil_expr_map:
3576+
self._llil_ssa_to_mlil_expr_map[source_location.source_llil_instruction] = []
3577+
self._llil_ssa_to_mlil_expr_map[source_location.source_llil_instruction].append((index, source_location.il_direct))
3578+
return index
35633579
else:
35643580
return ExpressionIndex(core.BNMediumLevelILAddExpr(self.handle, _operation, size, a, b, c, d, e))
35653581

@@ -3946,7 +3962,7 @@ def translate(
39463962
for instr_index in range(block.start, block.end):
39473963
instr: MediumLevelILInstruction = self[InstructionIndex(instr_index)]
39483964
propagated_func.set_current_address(instr.address, block.arch)
3949-
propagated_func.append(expr_handler(propagated_func, block, instr))
3965+
propagated_func.append(expr_handler(propagated_func, block, instr), ILSourceLocation.from_instruction(instr))
39503966

39513967
return propagated_func
39523968

@@ -3970,15 +3986,85 @@ def set_expr_attributes(self, expr: InstructionOrExpression, value: ILInstructio
39703986
result |= flag.value
39713987
core.BNSetMediumLevelILExprAttributes(self.handle, expr, result)
39723988

3973-
def append(self, expr: ExpressionIndex) -> InstructionIndex:
3989+
def append(self, expr: ExpressionIndex, source_location: Optional['ILSourceLocation'] = None) -> InstructionIndex:
39743990
"""
39753991
``append`` adds the ExpressionIndex ``expr`` to the current MediumLevelILFunction.
39763992
39773993
:param ExpressionIndex expr: the ExpressionIndex to add to the current MediumLevelILFunction
39783994
:return: Index of added instruction in the current function
39793995
:rtype: int
39803996
"""
3981-
return InstructionIndex(core.BNMediumLevelILAddInstruction(self.handle, expr))
3997+
index = InstructionIndex(core.BNMediumLevelILAddInstruction(self.handle, expr))
3998+
3999+
# Update internal mappings to remember this
4000+
if source_location is not None:
4001+
if source_location.source_mlil_instruction is not None:
4002+
if source_location.source_mlil_instruction not in self._mlil_to_mlil_instr_map:
4003+
self._mlil_to_mlil_instr_map[source_location.source_mlil_instruction] = []
4004+
self._mlil_to_mlil_instr_map[source_location.source_mlil_instruction].append((index, source_location.il_direct))
4005+
if source_location.source_llil_instruction is not None \
4006+
and source_location.source_llil_instruction.function.il_form == FunctionGraphType.LowLevelILSSAFormFunctionGraph:
4007+
if source_location.source_llil_instruction not in self._llil_ssa_to_mlil_instr_map:
4008+
self._llil_ssa_to_mlil_instr_map[source_location.source_llil_instruction] = []
4009+
self._llil_ssa_to_mlil_instr_map[source_location.source_llil_instruction].append((index, source_location.il_direct))
4010+
return index
4011+
4012+
def _get_llil_ssa_to_mlil_instr_map(self, from_builders: bool) -> LLILSSAToMLILInstructionMapping:
4013+
llil_ssa_to_mlil_instr_map = {}
4014+
4015+
if from_builders:
4016+
for (old_instr, new_indices) in self._mlil_to_mlil_instr_map.items():
4017+
old_instr: MediumLevelILInstruction
4018+
new_indices: List[InstructionIndex]
4019+
4020+
# Look up the LLIL SSA instruction for the old instr in its function
4021+
# And then store that mapping for the new function
4022+
4023+
for (new_index, new_direct) in new_indices:
4024+
# Instructions are always mapped 1 to 1. If the map is marked indirect
4025+
# then just ignore it
4026+
if new_direct:
4027+
old_llil_ssa_index = old_instr.function.get_low_level_il_instruction_index(old_instr.instr_index)
4028+
llil_ssa_to_mlil_instr_map[old_llil_ssa_index] = new_index
4029+
else:
4030+
for instr in self.instructions:
4031+
llil_ssa_index = self.get_low_level_il_instruction_index(instr.instr_index)
4032+
llil_ssa_to_mlil_instr_map[llil_ssa_index] = instr.instr_index
4033+
4034+
return llil_ssa_to_mlil_instr_map
4035+
4036+
def _get_llil_ssa_to_mlil_expr_map(self, from_builders: bool) -> LLILSSAToMLILExpressionMapping:
4037+
llil_ssa_to_mlil_expr_map = {}
4038+
4039+
if from_builders:
4040+
for (old_expr, new_indices) in self._mlil_to_mlil_expr_map.items():
4041+
old_expr: MediumLevelILInstruction
4042+
new_indices: List[ExpressionIndex]
4043+
4044+
# Look up the LLIL SSA expression for the old expr in its function
4045+
# And then store that mapping for the new function
4046+
4047+
old_llil_ssa_direct = old_expr.function.get_low_level_il_expr_index(old_expr.expr_index)
4048+
old_llil_ssa_indices = old_expr.function.get_low_level_il_expr_indexes(old_expr.expr_index)
4049+
for old_index in old_llil_ssa_indices:
4050+
if old_index not in llil_ssa_to_mlil_expr_map:
4051+
llil_ssa_to_mlil_expr_map[old_index] = []
4052+
for (new_index, new_direct) in new_indices:
4053+
llil_ssa_to_mlil_expr_map[old_index].append(new_index)
4054+
else:
4055+
for instr in self.instructions:
4056+
for expr in instr.traverse(lambda e: e):
4057+
llil_ssa_direct = self.get_low_level_il_expr_index(expr.expr_index)
4058+
llil_ssa_indices = self.get_low_level_il_expr_indexes(expr.expr_index)
4059+
for llil_ssa_index in llil_ssa_indices:
4060+
if llil_ssa_index not in llil_ssa_to_mlil_expr_map:
4061+
llil_ssa_to_mlil_expr_map[llil_ssa_index] = []
4062+
if llil_ssa_index == llil_ssa_direct:
4063+
llil_ssa_to_mlil_expr_map[llil_ssa_index].insert(0, expr.expr_index)
4064+
else:
4065+
llil_ssa_to_mlil_expr_map[llil_ssa_index].append(expr.expr_index)
4066+
4067+
return llil_ssa_to_mlil_expr_map
39824068

39834069
def nop(self, loc: Optional['ILSourceLocation'] = None) -> ExpressionIndex:
39844070
"""

python/workflow.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,10 @@ def set_mlil_function(
130130
one or more MLIL expressions (first expression
131131
will be the primary)
132132
"""
133-
134133
if llil_ssa_to_mlil_instr_map is None or llil_ssa_to_mlil_expr_map is None:
135-
llil_ssa_to_mlil_instr_map = {}
136-
llil_ssa_to_mlil_expr_map = {}
134+
# Build up maps from existing data in the function
135+
llil_ssa_to_mlil_instr_map = new_func._get_llil_ssa_to_mlil_instr_map(True)
136+
llil_ssa_to_mlil_expr_map = new_func._get_llil_ssa_to_mlil_expr_map(True)
137137

138138
# Number of instructions
139139
instr_count = 0

0 commit comments

Comments
 (0)