Skip to content

Commit 61d1c07

Browse files
committed
AST: facility to visualize reaching defs
1 parent d53121c commit 61d1c07

File tree

9 files changed

+347
-27
lines changed

9 files changed

+347
-27
lines changed

chb/app/CHVersion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
chbversion: str = "0.3.0-20250805"
1+
chbversion: str = "0.3.0-20250808"

chb/app/Cfg.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,14 @@ def successors(self, src: str) -> Sequence[str]:
815815
else:
816816
return []
817817

818+
def exitblocks(self) -> Sequence[str]:
819+
blocks = list(self.blocks.keys())
820+
result: List[str] = []
821+
for b in blocks:
822+
if not b in self.edges or len(self.edges[b]) == 0:
823+
result.append(b)
824+
return result
825+
818826
def __str__(self) -> str:
819827
lines: List[str] = []
820828
lines.append("Basic blocks: ")

chb/app/Function.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,12 @@ def block(self, baddr: str) -> BasicBlock:
474474
else:
475475
raise UF.CHBError("Block " + baddr + " not found in " + self.faddr)
476476

477+
def containing_block(self, iaddr: str) -> str:
478+
for (baddr, b) in self.blocks.items():
479+
if b.has_instruction(iaddr):
480+
return baddr
481+
raise UF.CHBError("Containing block not found for instruction address " + iaddr)
482+
477483
def load_instructions(self) -> Mapping[str, Sequence[Instruction]]:
478484
"""Return a mapping of block address to instructions that save to memory."""
479485

chb/app/Instruction.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,17 @@ def rhs_expressions(self, filter: Callable[[XXpr], bool]) -> List[XXpr]:
291291
def return_value(self) -> Optional[XXpr]:
292292
return None
293293

294+
def reaching_definitions(self, var: str) -> List[str]:
295+
rdefs = self.xdata.reachingdefs
296+
result: List[str] = []
297+
for rdef in rdefs:
298+
if rdef is not None:
299+
if str(rdef.variable) == var:
300+
for loc in rdef.deflocations:
301+
if str(loc) not in result:
302+
result.append(str(loc))
303+
return result
304+
294305
def assembly_ast(self, astree: ASTInterface) -> List[AST.ASTInstruction]:
295306
raise UF.CHBError("assembly-ast not defined")
296307

chb/arm/opcodes/ARMBranch.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,9 @@ def opargs(self) -> List[ARMOperand]:
216216
def ft_conditions(self, xdata: InstrXData) -> Sequence[XXpr]:
217217
xd = ARMBranchXData(xdata)
218218
if xdata.has_branch_conditions():
219-
if xd.is_ok:
219+
if xd.is_ctcond_ok:
220+
return [xd.cfcond, xd.ctcond]
221+
elif xd.is_ok:
220222
return [xd.fcond, xd.tcond]
221223
else:
222224
return [xd.fxpr, xd.txpr]

chb/cmdline/astcmds.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,13 @@
5757
from chb.cmdline.PatchResults import PatchResults, PatchEvent
5858
import chb.cmdline.XInfo as XI
5959

60+
from chb.graphics.DotRdefPath import DotRdefPath
61+
6062
from chb.userdata.UserHints import UserHints
6163

64+
import chb.util.dotutil as UD
6265
import chb.util.fileutil as UF
66+
import chb.util.graphutil as UG
6367
from chb.util.loggingutil import chklogger, LogLevel
6468

6569

@@ -144,8 +148,8 @@ def buildast(args: argparse.Namespace) -> NoReturn:
144148
xpatchresultsfile = args.patch_results_file
145149
hide_globals: bool = args.hide_globals
146150
hide_annotations: bool = args.hide_annotations
147-
remove_edges: List[str] = args.remove_edges
148-
add_edges: List[str] = args.add_edges
151+
show_reachingdefs: str = args.show_reachingdefs
152+
output_reachingdefs: str = args.output_reachingdefs
149153
verbose: bool = args.verbose
150154
loglevel: str = args.loglevel
151155
logfilename: Optional[str] = args.logfilename
@@ -386,6 +390,57 @@ def buildast(args: argparse.Namespace) -> NoReturn:
386390
functions_failed += 1
387391
continue
388392

393+
if show_reachingdefs is not None:
394+
if output_reachingdefs is None:
395+
UC.print_error("\nSpecify a file to save the reaching defs")
396+
continue
397+
398+
rdefspec = show_reachingdefs.split(":")
399+
if len(rdefspec) != 2:
400+
UC.print_error(
401+
"\nArgument to show_reachingdefs not recognized")
402+
continue
403+
404+
useloc = rdefspec[0]
405+
register = rdefspec[1]
406+
407+
if not f.has_instruction(useloc):
408+
UC.print_status_update("Useloc: " + useloc + " not found")
409+
continue
410+
411+
tgtinstr = f.instruction(useloc)
412+
413+
if not register in f.rdef_locations():
414+
UC.print_status_update(
415+
"Register " + register + " not found in rdeflocations")
416+
continue
417+
418+
cblock = f.containing_block(useloc)
419+
graph = UG.DirectedGraph(list(f.cfg.blocks.keys()), f.cfg.edges)
420+
rdefs = tgtinstr.reaching_definitions(register)
421+
dotpaths: List[DotRdefPath] = []
422+
graph.find_paths(f.faddr, cblock)
423+
for (i, p) in enumerate(
424+
sorted(graph.get_paths(), key=lambda p: len(p))):
425+
cfgpath = DotRdefPath(
426+
"path" + str(i),
427+
f,
428+
astinterface,
429+
p,
430+
subgraph=True,
431+
nodeprefix = str(i) +":",
432+
rdefinstrs = rdefs)
433+
dotpaths.append(cfgpath)
434+
435+
pdffilename = UD.print_dot_subgraphs(
436+
app.path,
437+
"paths",
438+
output_reachingdefs,
439+
"pdf",
440+
[dotcfg.build() for dotcfg in dotpaths])
441+
442+
UC.print_status_update("Printed " + pdffilename)
443+
389444
else:
390445
UC.print_error("Unable to find function " + faddr)
391446
functions_failed += 1

chb/cmdline/chkx

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -730,19 +730,15 @@ def parse() -> argparse.Namespace:
730730
"--hide_annotations",
731731
help="do not include annotations in printed C code",
732732
action="store_true")
733-
buildast.add_argument(
734-
"--remove_edges",
735-
nargs="*",
736-
default=[],
737-
help="list of edges to be removed (in the form faddr:src-addr:tgt-addr in hex)")
738-
buildast.add_argument(
739-
"--add_edges",
740-
nargs="*",
741-
default=[],
742-
help="list of edges to be added (in the form faddr:src-addr:tgt-addr in hex)")
743733
buildast.add_argument(
744734
"--verbose", "-v",
745735
action="store_true")
736+
buildast.add_argument(
737+
"--show_reachingdefs",
738+
help="create a dot file for the reaching defs of <addr>:<reg>")
739+
buildast.add_argument(
740+
"--output_reachingdefs",
741+
help="name of output file (without extension) to store dot/pdf file of reachingdefs")
746742
buildast.add_argument(
747743
"--loglevel", "-log",
748744
choices=UL.LogLevel.options(),

chb/graphics/DotRdefPath.py

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
# ------------------------------------------------------------------------------
2+
# CodeHawk Binary Analyzer
3+
# Author: Henny Sipma
4+
# ------------------------------------------------------------------------------
5+
# The MIT License (MIT)
6+
#
7+
# Copyright (c) 2025 Aarno Labs LLC
8+
#
9+
# Permission is hereby granted, free of charge, to any person obtaining a copy
10+
# of this software and associated documentation files (the "Software"), to deal
11+
# in the Software without restriction, including without limitation the rights
12+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13+
# copies of the Software, and to permit persons to whom the Software is
14+
# furnished to do so, subject to the following conditions:
15+
#
16+
# The above copyright notice and this permission notice shall be included in all
17+
# copies or substantial portions of the Software.
18+
#
19+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25+
# SOFTWARE.
26+
# ------------------------------------------------------------------------------
27+
28+
from typing import Dict, List, Optional, TYPE_CHECKING
29+
30+
import chb.util.fileutil as UF
31+
32+
from chb.util.DotGraph import DotGraph
33+
34+
if TYPE_CHECKING:
35+
from chb.app.Function import Function
36+
from chb.app.Instruction import Instruction
37+
from chb.astinterface.ASTInterface import ASTInterface
38+
39+
40+
class DotRdefPath:
41+
42+
def __init__(
43+
self,
44+
graphname: str,
45+
fn: "Function",
46+
astree: "ASTInterface",
47+
path: List[str],
48+
nodeprefix: str = "",
49+
replacements: Dict[str, str] = {},
50+
rdefinstrs: List[str] = [],
51+
subgraph: bool = False) -> None:
52+
53+
self._fn = fn
54+
self._graphname = graphname
55+
self._astree = astree
56+
self._path = path
57+
self._nodeprefix = nodeprefix
58+
self._subgraph = subgraph
59+
self._replacements = replacements
60+
self._rdefinstrs = rdefinstrs
61+
self._dotgraph = DotGraph(graphname, subgraph=self.subgraph)
62+
63+
@property
64+
def function(self) -> "Function":
65+
return self._fn
66+
67+
@property
68+
def graphname(self) -> str:
69+
return self._graphname
70+
71+
@property
72+
def astree(self) -> "ASTInterface":
73+
return self._astree
74+
75+
@property
76+
def path(self) -> List[str]:
77+
return self._path
78+
79+
@property
80+
def nodeprefix(self) -> str:
81+
return self._nodeprefix
82+
83+
@property
84+
def subgraph(self) -> bool:
85+
return self._subgraph
86+
87+
def pathindex(self, baddr: str) -> int:
88+
for (i, n) in enumerate(self.path):
89+
if n == baddr:
90+
return i
91+
raise UF.CHBError("Address " + baddr + " not found in path")
92+
93+
def build(self) -> DotGraph:
94+
for n in self.path:
95+
self.add_node(n)
96+
97+
for i in range(len(self.path) - 1):
98+
self.add_edge(self.path[i], self.path[i+1])
99+
100+
if self.init_is_exposed():
101+
(fvar, _) = self.astree.get_formal_locindices(0)
102+
btype = fvar.bctyp
103+
self._dotgraph.add_node(
104+
self.nodeprefix + "init",
105+
labeltxt="{ init | " + str(btype) + " " + fvar.vname + "}",
106+
shaded=True,
107+
color="orange",
108+
recordformat=True)
109+
self._dotgraph.add_edge(
110+
self.nodeprefix + "init", self.nodeprefix + self.path[0])
111+
112+
return self._dotgraph
113+
114+
def init_is_exposed(self) -> bool:
115+
result = True
116+
for p in self.path:
117+
instrs = self.rdef_instructions(p)
118+
if any(not instr.has_control_flow() for instr in instrs):
119+
result = False
120+
return result
121+
122+
def is_exposed(self, n: str) -> bool:
123+
index = self.pathindex(n)
124+
for i in range(index + 1, len(self.path)):
125+
node = self.path[i]
126+
instrs = self.rdef_instructions(node)
127+
if any(not instr.has_control_flow() for instr in instrs):
128+
return False
129+
return True
130+
131+
def replace_text(self, txt: str) -> str:
132+
result = txt
133+
for src in sorted(self._replacements, key=lambda x: len(x), reverse=True):
134+
result = result.replace(src, self._replacements[src])
135+
return result
136+
137+
def get_branch_instruction(self, n: str) -> Optional["Instruction"]:
138+
src = self.function.cfg.blocks[n]
139+
instraddr = src.lastaddr
140+
return self.function.instruction(instraddr)
141+
142+
def rdef_instructions(self, n: str) -> List["Instruction"]:
143+
block = self.function.blocks[n]
144+
lastaddr = block.lastaddr
145+
baddr = int(n, 16)
146+
xaddr = int(lastaddr, 16)
147+
result: List["Instruction"] = []
148+
for i in self._rdefinstrs:
149+
if i == "init":
150+
continue
151+
ix = int(i, 16)
152+
if ix >= baddr and ix <= xaddr:
153+
instr = block.instructions[i]
154+
result.append(instr)
155+
return result
156+
157+
def add_node(self, n: str) -> None:
158+
nodename = self.nodeprefix + n
159+
rdefinstrs = self.rdef_instructions(n)
160+
blocktxt = n
161+
color: Optional[str] = None
162+
fillcolor: Optional[str] = None
163+
if len(rdefinstrs) > 0:
164+
conditions: List[str] = []
165+
pinstrs: List[str] = []
166+
for instr in rdefinstrs:
167+
(hlinstrs, _) = instr.ast_prov(self.astree)
168+
pinstrs.extend(str(hlinstr) for hlinstr in hlinstrs)
169+
if instr.has_control_flow():
170+
(cc, _) = instr.ast_cc_condition_prov(self.astree)
171+
conditions.append(str(cc))
172+
if self.is_exposed(n):
173+
if any(instr.has_control_flow() for instr in rdefinstrs):
174+
fillcolor = "yellow"
175+
else:
176+
fillcolor = "orange"
177+
if len(conditions) > 0:
178+
blocktxt = (
179+
"{" + n + "|" + ("if " + "\\n".join(conditions))
180+
+ "|" + "\\n".join(pinstrs) + "}")
181+
else:
182+
blocktxt = ("{" + n + "|" + "\\n".join(pinstrs) + "}")
183+
self._dotgraph.add_node(
184+
str(nodename),
185+
labeltxt=blocktxt,
186+
shaded=True,
187+
color=color,
188+
fillcolor=fillcolor,
189+
recordformat=True)
190+
191+
def add_edge(self, n1: str, n2: str) -> None:
192+
nodename1 = self.nodeprefix + n1
193+
nodename2 = self.nodeprefix + n2
194+
srcblock = self.function.block(n1)
195+
labeltxt: Optional[str] = None
196+
if len(self.function.cfg.edges[n1]) == 2:
197+
tgtedges = self.function.cfg.edges[n1]
198+
branchinstr = self.get_branch_instruction(n1)
199+
if branchinstr and branchinstr.is_branch_instruction:
200+
ftconds = branchinstr.ft_conditions
201+
if len(ftconds) == 2:
202+
if n2 == tgtedges[0]:
203+
astcond = branchinstr.ast_condition_prov(
204+
self.astree, reverse=True)
205+
else:
206+
astcond = branchinstr.ast_condition_prov(
207+
self.astree, reverse=False)
208+
labeltxt = str(astcond[0])
209+
self._dotgraph.add_edge(nodename1, nodename2, labeltxt=labeltxt)

0 commit comments

Comments
 (0)