Skip to content

Commit b2234ee

Browse files
committed
ASTI: introduce basic block fragment for predicated instructions
1 parent 3f207d8 commit b2234ee

File tree

6 files changed

+291
-10
lines changed

6 files changed

+291
-10
lines changed

chb/app/BasicBlock.py

Lines changed: 173 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# ------------------------------------------------------------------------------
55
# The MIT License (MIT)
66
#
7-
# Copyright (c) 2021-2024 Aarno Labs LLC
7+
# Copyright (c) 2021-2025 Aarno Labs LLC
88
#
99
# Permission is hereby granted, free of charge, to any person obtaining a copy
1010
# of this software and associated documentation files (the "Software"), to deal
@@ -52,12 +52,135 @@
5252
import chb.app.Function
5353

5454

55+
class BasicBlockFragment:
56+
"""Represents a basic block fragment without ast control flow.
57+
58+
In ARM instructions may be predicated, e.g.:
59+
60+
MOVEQ R0, R1 : if condition then R0 := R1
61+
62+
In the ocaml analyzer this additional control flow can be accomodated
63+
directly in CHIF without the need to create a separate basic block
64+
in the CFG (i.e. lightweight control flow). In decompilation, some of
65+
these instructions may be accomodated without explicit control flow (e.g.,
66+
by the C ternary operation), but this is not always possible.
67+
68+
However, even when predicated instructions cannot be converted into
69+
expressions, it is not necessary to create top-level basic blocks
70+
that are subject to the CFG-to-AST conversion. A more light-weight
71+
solution is to embed the necessary control flow (notably limited
72+
to branches and instruction sequences) within the block created for
73+
the original basic block.
74+
75+
The basic block is partitioned into a linear sequence of BasicBlock
76+
Fragments, where each fragment can be one of the following:
77+
- a (linear) instruction sequence statement
78+
- a branch statement containing a condition and a single (if) branch
79+
- a branch statement containing a condition and a then and an else
80+
branch.
81+
In case of the branch statement either branch can have one or more
82+
instructions that have the same condition setter and the same
83+
condition.
84+
"""
85+
86+
def __init__(self, instr: Instruction) -> None:
87+
self._linear: List[Instruction] = []
88+
self._thenbranch: List[Instruction] = []
89+
self._elsebranch: List[Instruction] = []
90+
self._setter: Optional[str] = None
91+
self._condition: Optional[str] = None
92+
self.add_instr(instr)
93+
94+
@property
95+
def condition(self) -> Optional[str]:
96+
return self._condition
97+
98+
@property
99+
def setter(self) -> Optional[str]:
100+
return self._setter
101+
102+
@property
103+
def is_predicated(self) -> bool:
104+
return self.condition is not None
105+
106+
@property
107+
def is_then_only(self) -> bool:
108+
return self.is_predicated and len(self.elsebranch) == 0
109+
110+
@property
111+
def linear(self) -> List[Instruction]:
112+
return self._linear
113+
114+
@property
115+
def thenbranch(self) -> List[Instruction]:
116+
return self._thenbranch
117+
118+
@property
119+
def elsebranch(self) -> List[Instruction]:
120+
return self._elsebranch
121+
122+
@property
123+
def is_empty(self) -> bool:
124+
return (
125+
len(self.linear) + len(self.thenbranch) + len(self.elsebranch) == 0)
126+
127+
def add_predicated_instr(self, instr: Instruction) -> None:
128+
if self.is_empty:
129+
self._condition = instr.get_instruction_cc()
130+
self._setter = instr.get_instruction_condition_setter()
131+
self.thenbranch.append(instr)
132+
elif self.is_predicated:
133+
if self.condition == instr.get_instruction_cc():
134+
self.thenbranch.append(instr)
135+
else:
136+
self.elsebranch.append(instr)
137+
else:
138+
raise UF.CHBError("Cannot add predicated instruction to linear frag")
139+
140+
def add_linear_instr(self, instr: Instruction) -> None:
141+
if self.is_empty or (not self.is_predicated):
142+
self.linear.append(instr)
143+
else:
144+
raise UF.CHBError(
145+
"Cannot add unpredicated instruction to predicate fragment")
146+
147+
def add_instr(self, instr: Instruction) -> None:
148+
if instr.has_control_flow():
149+
self.add_predicated_instr(instr)
150+
else:
151+
self.add_linear_instr(instr)
152+
153+
def __str__(self) -> str:
154+
lines: List[str] = []
155+
if self.condition:
156+
setter = " (" + self.setter + ")" if self.setter else ""
157+
lines.append("condition: " + self.condition + setter)
158+
if self.linear:
159+
lines.append("linear")
160+
for i in self.linear:
161+
lines.append(" " + str(i))
162+
if self.thenbranch:
163+
lines.append("then:")
164+
for i in self.thenbranch:
165+
lines.append(" " + str(i))
166+
if self.elsebranch:
167+
lines.append("else:")
168+
for i in self.elsebranch:
169+
lines.append(" " + str(i))
170+
return "\n".join(lines)
171+
172+
55173
class BasicBlock(ABC):
56174

57175
def __init__(
58176
self,
59177
xnode: ET.Element) -> None:
60178
self.xnode = xnode
179+
self._partition: Dict[str, BasicBlockFragment] = {}
180+
181+
@property
182+
def partition(self) -> Dict[str, BasicBlockFragment]:
183+
return self._partition
61184

62185
@property
63186
def baddr(self) -> str:
@@ -99,6 +222,55 @@ def has_return(self) -> bool:
99222
def has_conditional_return(self) -> bool:
100223
return self.last_instruction.is_conditional_return_instruction
101224

225+
def has_control_flow(self) -> bool:
226+
"""Returns true if this block contains predicated instructions that
227+
are not otherwise covered in aggregates or by other conditions.
228+
229+
The case of a block with a conditional return is already handled in
230+
the Cfg, so it is excluded here.
231+
232+
The case of a block with a conditional return and other conditional
233+
instructions is not yet handled.
234+
"""
235+
236+
count = len([i for i in self.instructions.values() if i.has_control_flow()])
237+
if count == 1 and self.has_conditional_return:
238+
return False
239+
240+
return any(i.has_control_flow() for i in self.instructions.values())
241+
242+
def partition_control_flow(self) -> None:
243+
curblock: Optional[BasicBlockFragment] = None
244+
curaddr: Optional[str] = None
245+
246+
for (a, i) in sorted(self.instructions.items()):
247+
if curaddr is None or curblock is None:
248+
curaddr = a
249+
curblock = BasicBlockFragment(i)
250+
else:
251+
if i.has_control_flow():
252+
if curblock.is_predicated:
253+
if i.get_instruction_condition_setter() == curblock.setter:
254+
curblock.add_instr(i)
255+
else:
256+
self._partition[curaddr] = curblock
257+
curblock = BasicBlockFragment(i)
258+
curaddr = a
259+
else:
260+
self._partition[curaddr] = curblock
261+
curblock = BasicBlockFragment(i)
262+
curaddr = a
263+
else:
264+
if curblock.is_predicated:
265+
self._partition[curaddr] = curblock
266+
curblock = BasicBlockFragment(i)
267+
curaddr = a
268+
else:
269+
curblock.add_instr(i)
270+
271+
if curaddr is not None and curblock is not None:
272+
self._partition[curaddr] = curblock
273+
102274
@property
103275
@abstractmethod
104276
def instructions(self) -> Mapping[str, Instruction]:

chb/app/Cfg.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -687,13 +687,14 @@ def ast(self,
687687
succ = self.successors(n)[0]
688688
instr = astblock.last_instruction
689689
rv = instr.return_value()
690+
rvcondition = instr.ast_condition(astree)
690691
astexpr: Optional[AST.ASTExpr] = None
691692
if rv is not None and not astree.returns_void():
692693
astexpr = XU.xxpr_to_ast_def_expr(
693694
rv, instr.xdata, instr.iaddr, astree)
694695
rtnstmt = astree.mk_return_stmt(
695696
astexpr, instr.iaddr, instr.bytestring)
696-
rvcondition = instr.ast_condition(astree)
697+
697698
if rvcondition is not None:
698699
elsebr = astree.mk_instr_sequence([])
699700
brstmt = cast(AST.ASTBranch, astree.mk_branch(
@@ -702,7 +703,15 @@ def ast(self,
702703
else:
703704
blockstmts[n] = [blocknode, rtnstmt]
704705
else:
705-
blockstmts[n] = [blocknode]
706+
rtnstmt = astree.mk_return_stmt(
707+
None, instr.iaddr, instr.bytestring)
708+
if rvcondition is not None:
709+
elsebr = astree.mk_instr_sequence([])
710+
brstmt = cast(AST.ASTBranch, astree.mk_branch(
711+
rvcondition, rtnstmt, elsebr, succ))
712+
blockstmts[n] = [blocknode, brstmt]
713+
else:
714+
blockstmts[n] = [blocknode, rtnstmt]
706715

707716
elif astblock.has_return:
708717
instr = astblock.last_instruction

chb/app/InstrXData.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,22 @@ def get_return_cxpr(self) -> XXpr:
642642
raise UF.CHBError("Unexpected error in C return value")
643643
return self.xprdictionary.xpr(rval)
644644

645+
@property
646+
def is_predicate_assignment(self) -> bool:
647+
return "agg:predassign" in self.tags
648+
649+
@property
650+
def is_nondet_predicate_assignment(self) -> bool:
651+
return "agg:predassign:nd" in self.tags
652+
653+
@property
654+
def is_ternary_assignment(self) -> bool:
655+
return "agg:ternassign" in self.tags
656+
657+
@property
658+
def is_nondet_ternary_assignment(self) -> bool:
659+
return "agg:ternassign:nd" in self.tags
660+
645661
@property
646662
def is_aggregate_jumptable(self) -> bool:
647663
return "agg-jt" in self.tags

chb/app/Instruction.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# ------------------------------------------------------------------------------
55
# The MIT License (MIT)
66
#
7-
# Copyright (c) 2021-2024 Aarno Labs LLC
7+
# Copyright (c) 2021-2025 Aarno Labs LLC
88
#
99
# Permission is hereby granted, free of charge, to any person obtaining a copy
1010
# of this software and associated documentation files (the "Software"), to deal
@@ -145,6 +145,18 @@ def rev_bytestring(self) -> str:
145145
revb = "".join(i+j for i, j in zip(b[:-1][::-2], b[::-2]))
146146
return revb
147147

148+
def has_control_flow(self) -> bool:
149+
"""Returns true if this instruction is predicated and not covered
150+
by an enclosing aggregate or other condition."""
151+
152+
return self.xnode.get("brcc") is not None
153+
154+
def get_instruction_cc(self) -> Optional[str]:
155+
return self.xnode.get("brcc")
156+
157+
def get_instruction_condition_setter(self) -> Optional[str]:
158+
return self.xnode.get("brsetter")
159+
148160
def md5(self) -> str:
149161
m = hashlib.md5()
150162
m.update(self.bytestring.encode("utf-8"))
@@ -292,10 +304,16 @@ def ast_prov(self, astree: ASTInterface) -> Tuple[
292304
def is_condition_true(self) -> bool:
293305
return False
294306

295-
def ast_condition_prov(self, astree: ASTInterface, reverse: bool = False) -> Tuple[
296-
Optional[AST.ASTExpr], Optional[AST.ASTExpr]]:
307+
def ast_condition_prov(
308+
self, astree: ASTInterface, reverse: bool = False
309+
) -> Tuple[Optional[AST.ASTExpr], Optional[AST.ASTExpr]]:
297310
raise UF.CHBError("ast-condition-prov not defined")
298311

312+
def ast_cc_condition_prov(
313+
self, astree: ASTInterface
314+
) -> Tuple[Optional[AST.ASTExpr], Optional[AST.ASTExpr]]:
315+
raise UF.CHBError("ast-cc-codntiion-prov not defined")
316+
299317
def assembly_ast_condition(
300318
self,
301319
astree: ASTInterface,

chb/astinterface/ASTInterfaceBasicBlock.py

Lines changed: 49 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# ------------------------------------------------------------------------------
55
# The MIT License (MIT)
66
#
7-
# Copyright (c) 2022-2024 Aarno Labs LLC
7+
# Copyright (c) 2022-2025 Aarno Labs LLC
88
#
99
# Permission is hereby granted, free of charge, to any person obtaining a copy
1010
# of this software and associated documentation files (the "Software"), to deal
@@ -26,7 +26,7 @@
2626
# ------------------------------------------------------------------------------
2727
"""Basic block in an abstract syntax tree."""
2828

29-
from typing import cast, Dict, List, Optional, Set, TYPE_CHECKING
29+
from typing import cast, Dict, List, Optional, Set, Tuple, TYPE_CHECKING
3030

3131
import chb.ast.ASTNode as AST
3232

@@ -40,7 +40,7 @@
4040

4141
if TYPE_CHECKING:
4242
from chb.arm.ARMCfgBlock import ARMCfgBlock
43-
from chb.app.BasicBlock import BasicBlock
43+
from chb.app.BasicBlock import BasicBlock, BasicBlockFragment
4444
from chb.arm.ARMInstruction import ARMInstruction
4545
from chb.arm.opcodes.ARMLogicalShiftLeft import ARMLogicalShiftLeft
4646
from chb.arm.opcodes.ARMReverseSubtract import ARMReverseSubtract
@@ -168,12 +168,57 @@ def assembly_ast(self, astree: "ASTInterface") -> AST.ASTStmt:
168168
instrs.extend(i.assembly_ast(astree))
169169
return astree.mk_instr_sequence(instrs)
170170

171+
def ast_fragment(
172+
self, astree: "ASTInterface", frag: "BasicBlockFragment") -> AST.ASTStmt:
173+
if frag.is_predicated:
174+
theninstrs = [ASTInterfaceInstruction(i) for i in frag.thenbranch]
175+
elseinstrs = [ASTInterfaceInstruction(i) for i in frag.elsebranch]
176+
thenstmt = self.linear_ast(astree, theninstrs)
177+
elsestmt = self.linear_ast(astree, elseinstrs)
178+
cinstr = theninstrs[0]
179+
brcond = cinstr.ast_cc_condition(astree)
180+
if brcond is None:
181+
chklogger.logger.warning(
182+
"No instruction predicate expression found at address %s",
183+
cinstr.iaddr)
184+
brcond = astree.mk_temp_lval_expression()
185+
return astree.mk_branch(brcond, thenstmt, elsestmt, "0x0")
186+
else:
187+
instrs = [ASTInterfaceInstruction(i) for i in frag.linear]
188+
return self.linear_ast(astree, instrs)
189+
190+
def fragmented_ast(self, astree: "ASTInterface") -> AST.ASTStmt:
191+
192+
if len(self.basicblock.partition) == 0:
193+
raise UF.CHBError("Error in fragmented ast")
194+
195+
stmts: List[AST.ASTStmt] = []
196+
197+
for (a, bf) in sorted(self.basicblock.partition.items()):
198+
stmt = self.ast_fragment(astree, bf)
199+
stmts.append(stmt)
200+
201+
return astree.mk_block(stmts)
202+
171203
def ast(self, astree: "ASTInterface") -> AST.ASTStmt:
172204
if self.is_trampoline:
173205
return self.trampoline_ast(astree)
174206

207+
if self.basicblock.has_control_flow():
208+
self.basicblock.partition_control_flow()
209+
return self.fragmented_ast(astree)
210+
211+
else:
212+
213+
return self.linear_ast(
214+
astree, sorted(self.instructions.values(), key = lambda p:p.iaddr))
215+
216+
def linear_ast(
217+
self,
218+
astree: "ASTInterface",
219+
instritems: List[ASTInterfaceInstruction]) -> AST.ASTStmt:
175220
instrs: List[AST.ASTInstruction] = []
176-
for (a, i) in sorted(self.instructions.items(), key=lambda p: p[0]):
221+
for i in instritems:
177222
instrs.extend(i.ast(astree))
178223
return astree.mk_instr_sequence(instrs)
179224

0 commit comments

Comments
 (0)