Skip to content

Commit 417a799

Browse files
authored
Merge pull request #25 from Microsoft/detect-exit-nodes
Fix exit nodes detection and EOF pruning.
2 parents 5073159 + d9dacc2 commit 417a799

File tree

12 files changed

+133
-41
lines changed

12 files changed

+133
-41
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ coverage.xml
4646
*.cover
4747
.hypothesis/
4848
.pytest_cache/
49+
test-results.xml
4950

5051
# Translations
5152
*.mo

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ This project uses [Semantic Versioning](https://semver.org) starting from versio
99

1010
## [unreleased]
1111

12+
## Fixed
13+
14+
- Fixed exit node detection (#13)
15+
- Fixed eof node pruning
16+
1217
## Changed
1318

1419
- Changed color palette to be more muted, and do not rely on color alone to convey

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,11 @@ There are 2 special nodes:
9898
of `goto` to indicate that the current "subroutine" should terminate, or the whole program should
9999
terminate if the call stack is empty.
100100

101+
The `eof` node is automatically removed if it's a pseudo-node and it's not reached via `call` or `nested`
102+
connections.
103+
104+
The `_begin_` pseudo-node is removed if there is another node starting at line 1.
105+
101106
### Types of connections
102107

103108
* `goto`: if an edge of type `goto` goes from `A` to `B`, it means that in the code within the label `A`

callgraph/core.py

Lines changed: 65 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def __init__(self, name):
4040
self.line_number = NO_LINE_NUMBER
4141
self.original_name = name
4242
self.is_exit_node = False
43+
self.is_last_node = False
4344
self.code = []
4445
self.loc = 0
4546

@@ -71,6 +72,7 @@ class CallGraph:
7172
def __init__(self, log_file=sys.stderr):
7273
self.nodes = {}
7374
self.log_file = log_file
75+
self.first_node = None
7476

7577
def GetOrCreateNode(self, name):
7678
if name in self.nodes:
@@ -80,6 +82,45 @@ def GetOrCreateNode(self, name):
8082
self.nodes[name] = node
8183
return node
8284

85+
def _MarkExitNodes(self):
86+
# A node is an exit node if:
87+
# 1. it contains an "exit" command with no target
88+
# or
89+
# 2. it's reached from the starting node via "goto" or "nested" connections
90+
# and it contains an exit command or a "goto eof" command.
91+
92+
# Identify all nodes with an exit command with no targets.
93+
for node in self.nodes.values():
94+
all_commands = set(itertools.chain.from_iterable(line.commands for line in node.code))
95+
exit_cmd = Command("exit", "")
96+
if exit_cmd in all_commands:
97+
node.is_exit_node = True
98+
99+
# Visit the call graph to find nodes satisfying condition #2.
100+
q = [self.first_node]
101+
visited = set() # Used to avoid loops, since the call graph is not acyclic.
102+
103+
while q:
104+
cur = q.pop()
105+
visited.add(cur.name)
106+
107+
# Evaluate condition for marking exit node.
108+
if cur.is_last_node:
109+
cur.is_exit_node = True
110+
else:
111+
all_commands = itertools.chain.from_iterable(line.commands for line in cur.code)
112+
for command in all_commands:
113+
if command[0] == "exit" or (command[0] == "goto" and command[1] == "eof"):
114+
cur.is_exit_node = True
115+
break
116+
117+
for connection in cur.connections:
118+
if connection.dst not in self.nodes or connection.dst in visited:
119+
continue
120+
if connection.kind == "nested" or connection.kind == "goto":
121+
q.append(self.nodes[connection.dst])
122+
123+
83124
# Adds to each node information depending on the contents of the code, such as connections
84125
# deriving from goto/call commands and whether the node is terminating or not.
85126
def _AnnotateNode(self, node):
@@ -136,38 +177,29 @@ def _AnnotateNode(self, node):
136177

137178
if command == "exit" and target == "":
138179
line.terminating = True
139-
node.is_exit_node = True
140180

141181
@staticmethod
142182
def Build(input_file, log_file=sys.stderr):
143183
call_graph = CallGraph._ParseSource(input_file, log_file)
144184
for node in call_graph.nodes.values():
145185
call_graph._AnnotateNode(node)
146186

147-
# Find exit nodes.
148-
last_node = max(call_graph.nodes.values(), key=lambda x: x.line_number)
149-
print(u"{0} is the last node, marking it as exit node.".format(last_node.name), file=log_file)
150-
last_node.is_exit_node = True
151-
152-
# If the last node's last statement is a goto not going towards eof, then
153-
# it's not an exit node.
154-
for line in reversed(last_node.code):
155-
if line.noop:
156-
continue
157-
158-
for command, target in line.commands:
159-
if command == "goto" and target and target != "eof":
160-
last_node.is_exit_node = False
161-
break
162-
163-
# Prune away EOF if it is a virtual node (no line number) and there are no connections to it.
187+
# Prune away EOF if it is a virtual node (no line number) and there are no call/nested connections to it.
164188
eof = call_graph.GetOrCreateNode("eof")
165-
if eof.line_number == NO_LINE_NUMBER:
166-
all_connections = itertools.chain.from_iterable(n.connections for n in call_graph.nodes.values())
167-
destinations = set(c.dst for c in all_connections)
168-
if "eof" not in destinations:
169-
print(u"Removing the eof node, since there are no connections to it and it's not a real node", file=log_file)
170-
del call_graph.nodes["eof"]
189+
all_connections = itertools.chain.from_iterable(n.connections for n in call_graph.nodes.values())
190+
destinations = set((c.dst, c.kind) for c in all_connections)
191+
if eof.line_number == NO_LINE_NUMBER and ("eof", "call") not in destinations and ("eof", "nested") not in destinations:
192+
print(u"Removing the eof node, since there are no call/nested connections to it and it's not a real node", file=log_file)
193+
del call_graph.nodes["eof"]
194+
for node in call_graph.nodes.values():
195+
eof_connections = [c for c in node.connections if c.dst == "eof"]
196+
print(u"Removing {} eof connections in node {}".format(len(eof_connections), node.name), file=log_file)
197+
for c in eof_connections:
198+
node.connections.remove(c)
199+
200+
# Warn the user if there are goto connections to eof, which will not be executed by CMD.
201+
if eof.line_number != NO_LINE_NUMBER and ("eof", "goto") in destinations:
202+
print(u"WARNING: there are goto connections to eof, but CMD will not execute that code via goto.", file=log_file)
171203

172204
# Find and mark the "nested" connections.
173205
nodes = [n for n in call_graph.nodes.values() if n.line_number != NO_LINE_NUMBER]
@@ -202,6 +234,12 @@ def Build(input_file, log_file=sys.stderr):
202234

203235
break
204236

237+
# Mark all exit nodes.
238+
last_node = max(call_graph.nodes.values(), key=lambda x: x.line_number)
239+
print(u"{0} is the last node, marking it as exit node.".format(last_node.name), file=log_file)
240+
last_node.is_last_node = True
241+
call_graph._MarkExitNodes()
242+
205243
return call_graph
206244

207245
# Creates a call graph from an input file, parsing the file in blocks and creating
@@ -214,6 +252,7 @@ def _ParseSource(input_file, log_file=sys.stderr):
214252
# Special node to signal the start of the script.
215253
cur_node = call_graph.GetOrCreateNode("__begin__")
216254
cur_node.line_number = 1
255+
call_graph.first_node = cur_node
217256

218257
# Special node used by cmd to signal the end of the script.
219258
eof = call_graph.GetOrCreateNode("eof")
@@ -240,9 +279,10 @@ def _ParseSource(input_file, log_file=sys.stderr):
240279
# nodes with the same line number.
241280
if line_number == 1:
242281
del call_graph.nodes["__begin__"]
282+
call_graph.first_node = next_node
243283

244284
cur_node = next_node
245285

246286
cur_node.AddCodeLine(line_number, line)
247287

248-
return call_graph
288+
return call_graph

examples/example1-nodestats.dot

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
digraph g {
2-
"__begin__" [label=<<b>__begin__</b><br/>(line 1)<br/><sub>[3 LOC]</sub>>]
3-
"__begin__" -> "eof" [label=<<b>goto</b><br />(line 3)>,color="#d83b01"]
2+
"__begin__" [color="#e6e6e6",style=filled,label=<<b>__begin__</b><br/>(line 1)<br/><sub>[3 LOC]</sub><br/><sub>[terminating]</sub>>]
43
"__begin__" -> "foo" [label=<<b>call</b><br />(line 2)>,color="#0078d4"]
54
"bar" [label=<<b>bar</b><br/>(line 4)<br/><sub>[4 LOC]</sub>>]
65
"bar" -> "baz" [label=<<b>call</b><br />(line 6)>,color="#0078d4"]
76
"bar" -> "baz" [label=<<b>call</b><br />(line 7)>,color="#0078d4"]
87
"bar" -> "baz" [label=" nested",color="#008575"]
98
"baz" [label=<<b>baz</b><br/>(line 8)<br/><sub>[4 LOC]</sub><br/><sub>[1 external call]</sub>>]
109
"baz" -> "foo" [label=" nested",color="#008575"]
11-
"eof" [color="#e6e6e6",style=filled,label=<<b>eof</b><br/><sub>[0 LOC]</sub><br/><sub>[terminating]</sub>>]
1210
"foo" [label=<<b>foo</b><br/>(line 12)<br/><sub>[3 LOC]</sub>>]
1311
"foo" -> "bar" [label=<<b>goto</b><br />(line 14)>,color="#d83b01"]
1412
}

examples/example1-nodestats.png

-11.1 KB
Loading

examples/example1-noshowall.dot

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
digraph g {
2-
"__begin__" [label=<<b>__begin__</b><br/>(line 1)>]
3-
"__begin__" -> "eof" [label=" goto",color="#d83b01"]
2+
"__begin__" [color="#e6e6e6",style=filled,label=<<b>__begin__</b><br/>(line 1)<br/><sub>[terminating]</sub>>]
43
"__begin__" -> "foo" [label=" call",color="#0078d4"]
54
"bar" [label=<<b>bar</b><br/>(line 4)>]
65
"bar" -> "baz" [label=" call",color="#0078d4"]
76
"bar" -> "baz" [label=" nested",color="#008575"]
87
"baz" [label=<<b>baz</b><br/>(line 8)>]
98
"baz" -> "foo" [label=" nested",color="#008575"]
10-
"eof" [color="#e6e6e6",style=filled,label=<<b>eof</b><br/><sub>[terminating]</sub>>]
119
"foo" [label=<<b>foo</b><br/>(line 12)>]
1210
"foo" -> "bar" [label=" goto",color="#d83b01"]
1311
}

examples/example1-noshowall.png

-9.47 KB
Loading

examples/example1.dot

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
digraph g {
2-
"__begin__" [label=<<b>__begin__</b><br/>(line 1)>]
3-
"__begin__" -> "eof" [label=<<b>goto</b><br />(line 3)>,color="#d83b01"]
2+
"__begin__" [color="#e6e6e6",style=filled,label=<<b>__begin__</b><br/>(line 1)<br/><sub>[terminating]</sub>>]
43
"__begin__" -> "foo" [label=<<b>call</b><br />(line 2)>,color="#0078d4"]
54
"bar" [label=<<b>bar</b><br/>(line 4)>]
65
"bar" -> "baz" [label=<<b>call</b><br />(line 6)>,color="#0078d4"]
76
"bar" -> "baz" [label=<<b>call</b><br />(line 7)>,color="#0078d4"]
87
"bar" -> "baz" [label=" nested",color="#008575"]
98
"baz" [label=<<b>baz</b><br/>(line 8)>]
109
"baz" -> "foo" [label=" nested",color="#008575"]
11-
"eof" [color="#e6e6e6",style=filled,label=<<b>eof</b><br/><sub>[terminating]</sub>>]
1210
"foo" [label=<<b>foo</b><br/>(line 12)>]
1311
"foo" -> "bar" [label=<<b>goto</b><br />(line 14)>,color="#d83b01"]
1412
}

examples/example1.png

-9.14 KB
Loading

0 commit comments

Comments
 (0)