Skip to content

Commit 1a57f81

Browse files
authored
Merge pull request github#12537 from yoff/python/captured-variables-for-typetracking
Python: Captured variables for type tracking and the API graph
2 parents 4457126 + 42090b5 commit 1a57f81

22 files changed

+299
-48
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Type tracking is now aware of reads of captured variables (variables defined in an outer scope). This leads to a richer API graph, and may lead to more results in some queries.

python/ql/lib/semmle/python/ApiGraphs.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -987,7 +987,7 @@ module API {
987987
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
988988
Stages::TypeTracking::ref() and
989989
result = trackUseNode(src, DataFlow::TypeTracker::end()) and
990-
result instanceof DataFlow::ExprNode
990+
result instanceof DataFlow::LocalSourceNodeNotModuleVariableNode
991991
}
992992

993993
/**

python/ql/lib/semmle/python/dataflow/new/internal/LocalSources.qll

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ class LocalSourceNode extends Node {
5151
// We explicitly include any read of a global variable, as some of these may have local flow going
5252
// into them.
5353
this = any(ModuleVariableNode mvn).getARead()
54+
or
55+
// We include all scope entry definitions, as these act as the local source within the scope they
56+
// enter.
57+
this.asVar() instanceof ScopeEntryDefinition
5458
}
5559

5660
/** Holds if this `LocalSourceNode` can flow to `nodeTo` in one or more local flow steps. */
@@ -133,6 +137,21 @@ class LocalSourceNode extends Node {
133137
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
134138
}
135139

140+
/**
141+
* A LocalSourceNode that is not a ModuleVariableNode
142+
* This class provides a positive formulation of that in its charpred.
143+
*
144+
* Aka FutureLocalSourceNode (see FutureWork below), but until the future is here...
145+
*/
146+
class LocalSourceNodeNotModuleVariableNode extends LocalSourceNode {
147+
cached
148+
LocalSourceNodeNotModuleVariableNode() {
149+
this instanceof ExprNode
150+
or
151+
this.asVar() instanceof ScopeEntryDefinition
152+
}
153+
}
154+
136155
/**
137156
* A node that can be used for type tracking or type back-tracking.
138157
*

python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackerSpecific.qll

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,19 @@ predicate compatibleContents(TypeTrackerContent storeContent, TypeTrackerContent
4343

4444
predicate simpleLocalFlowStep = DataFlowPrivate::simpleLocalFlowStepForTypetracking/2;
4545

46-
predicate jumpStep = DataFlowPrivate::jumpStepSharedWithTypeTracker/2;
46+
predicate jumpStep(Node nodeFrom, Node nodeTo) {
47+
DataFlowPrivate::jumpStepSharedWithTypeTracker(nodeFrom, nodeTo)
48+
or
49+
capturedJumpStep(nodeFrom, nodeTo)
50+
}
51+
52+
predicate capturedJumpStep(Node nodeFrom, Node nodeTo) {
53+
exists(SsaSourceVariable var, DefinitionNode def | var.hasDefiningNode(def) |
54+
nodeTo.asVar().(ScopeEntryDefinition).getSourceVariable() = var and
55+
nodeFrom.asCfgNode() = def.getValue() and
56+
var.getScope().getScope*() = nodeFrom.getScope()
57+
)
58+
}
4759

4860
/** Holds if there is a level step from `nodeFrom` to `nodeTo`, which may depend on the call graph. */
4961
predicate levelStepCall(Node nodeFrom, Node nodeTo) { none() }

python/ql/test/experimental/dataflow/coverage/test.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -726,15 +726,15 @@ def f6(arg):
726726
return f5(arg)
727727

728728
x = f6(SOURCE)
729-
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
729+
SINK(x) #$ flow="SOURCE, l:-1 -> x"
730730
x = f5(SOURCE)
731-
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
731+
SINK(x) #$ flow="SOURCE, l:-1 -> x"
732732
x = f4(SOURCE)
733-
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
733+
SINK(x) #$ flow="SOURCE, l:-1 -> x"
734734
x = f3(SOURCE)
735-
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
735+
SINK(x) #$ flow="SOURCE, l:-1 -> x"
736736
x = f2(SOURCE)
737-
SINK(x) #$ MISSING:flow="SOURCE, l:-1 -> x"
737+
SINK(x) #$ flow="SOURCE, l:-1 -> x"
738738
x = f1(SOURCE)
739739
SINK(x) #$ flow="SOURCE, l:-1 -> x"
740740

python/ql/test/experimental/dataflow/typetracking/moduleattr.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ module_attr_tracker
66
| import_as_attr.py:1:28:1:35 | GSSA Variable attr_ref |
77
| import_as_attr.py:3:1:3:1 | GSSA Variable x |
88
| import_as_attr.py:3:5:3:12 | ControlFlowNode for attr_ref |
9+
| import_as_attr.py:5:1:5:10 | GSSA Variable attr_ref |
910
| import_as_attr.py:6:5:6:5 | SSA variable y |
1011
| import_as_attr.py:6:9:6:16 | ControlFlowNode for attr_ref |

python/ql/test/experimental/dataflow/typetracking/test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,10 @@ def test_import():
6060
def to_inner_scope():
6161
x = tracked # $tracked
6262
def foo():
63-
y = x # $ MISSING: tracked
64-
return y # $ MISSING: tracked
65-
also_x = foo() # $ MISSING: tracked
66-
print(also_x) # $ MISSING: tracked
63+
y = x # $ tracked
64+
return y # $ tracked
65+
also_x = foo() # $ tracked
66+
print(also_x) # $ tracked
6767

6868
# ------------------------------------------------------------------------------
6969
# Function decorator

python/ql/test/experimental/dataflow/typetracking/tracked.ql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ class TrackedTest extends InlineExpectationsTest {
2424
tracked(t).flowsTo(e) and
2525
// Module variables have no sensible location, and hence can't be annotated.
2626
not e instanceof DataFlow::ModuleVariableNode and
27+
// Global variables on line 0 also cannot be annotated
28+
not e.getLocation().getStartLine() = 0 and
29+
// We do not wish to annotate scope entry definitions,
30+
// as they do not appear in the source code.
31+
not e.asVar() instanceof ScopeEntryDefinition and
2732
tag = "tracked" and
2833
location = e.getLocation() and
2934
value = t.getAttr() and

python/ql/test/experimental/dataflow/validTest.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,10 @@ def check_tests_valid_after_version(testFile, version):
6868
check_tests_valid("coverage-py3.classes")
6969
check_tests_valid("variable-capture.in")
7070
check_tests_valid("variable-capture.nonlocal")
71+
check_tests_valid("variable-capture.global")
7172
check_tests_valid("variable-capture.dict")
72-
check_tests_valid("variable-capture.collections")
73+
check_tests_valid("variable-capture.test_collections")
74+
check_tests_valid("variable-capture.by_value")
7375
check_tests_valid("module-initialization.multiphase")
7476
check_tests_valid("fieldflow.test")
7577
check_tests_valid("fieldflow.test_dict")
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Here we test capturing the _value_ of a variable (by using it as the default value for a parameter)
2+
# All functions starting with "test_" should run and execute `print("OK")` exactly once.
3+
# This can be checked by running validTest.py.
4+
5+
import sys
6+
import os
7+
8+
sys.path.append(os.path.dirname(os.path.dirname((__file__))))
9+
from testlib import expects
10+
11+
# These are defined so that we can evaluate the test code.
12+
NONSOURCE = "not a source"
13+
SOURCE = "source"
14+
15+
def is_source(x):
16+
return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j
17+
18+
19+
def SINK(x):
20+
if is_source(x):
21+
print("OK")
22+
else:
23+
print("Unexpected flow", x)
24+
25+
26+
def SINK_F(x):
27+
if is_source(x):
28+
print("Unexpected flow", x)
29+
else:
30+
print("OK")
31+
32+
33+
def by_value1():
34+
a = SOURCE
35+
def inner(a_val=a):
36+
SINK(a_val) #$ captured
37+
SINK_F(a)
38+
a = NONSOURCE
39+
inner()
40+
41+
def by_value2():
42+
a = NONSOURCE
43+
def inner(a_val=a):
44+
SINK(a) #$ MISSING:captured
45+
SINK_F(a_val)
46+
a = SOURCE
47+
inner()
48+
49+
@expects(4)
50+
def test_by_value():
51+
by_value1()
52+
by_value2()

0 commit comments

Comments
 (0)