Skip to content

Commit a55b43b

Browse files
authored
Python: Use LocalSourceNode throughout step
This commit does a lot of stuff all at once, so here are the main highlights: In `TypeTracker.qll`, we change `StepSummary::step` to step only between source nodes. Because reads and writes of global variables happen in two different (jump) steps, this requires the intermediate `ModuleVariableNode` to _also_ be a `LocalSourceNode`, and we therefore modify the charpred for that class accordingly. (This also means changing a few of the tests to account for these new source nodes.) In addition, we change `TypeTracker::step` to likewise step between local source nodes. Next, to enable the use of the `track` convenience method on nodes, we add some pragmas to `TypeTracker::step` that prevent bad joins from occurring. With this, we can eliminate all of the manual type tracker join predicates. Next, we observe that because `StepSummary::step` now uses `flowsTo`, it automatically encapsulates all local-flow steps. In particular this means we do not have to use `typePreservingStep` in `smallstep`, but can use `jumpStep` directly. A similar observation applies to `TypeTracker::smallstep`. Having done this, we no longer need `typePreservingStep`, so we get rid of it.
1 parent 31bd701 commit a55b43b

File tree

9 files changed

+20
-101
lines changed

9 files changed

+20
-101
lines changed

python/ql/src/Security/CVE-2018-1281/BindToAllInterfaces.ql

Lines changed: 2 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -32,21 +32,7 @@ private DataFlow::LocalSourceNode vulnerableHostnameRef(DataFlow::TypeTracker t,
3232
result.asExpr() = allInterfacesStrConst
3333
)
3434
or
35-
// Due to bad performance when using normal setup with `vulnerableHostnameRef(t2, hostname).track(t2, t)`
36-
// we have inlined that code and forced a join
37-
exists(DataFlow::TypeTracker t2 |
38-
exists(DataFlow::StepSummary summary |
39-
vulnerableHostnameRef_first_join(t2, hostname, result, summary) and
40-
t = t2.append(summary)
41-
)
42-
)
43-
}
44-
45-
pragma[nomagic]
46-
private predicate vulnerableHostnameRef_first_join(
47-
DataFlow::TypeTracker t2, string hostname, DataFlow::Node res, DataFlow::StepSummary summary
48-
) {
49-
DataFlow::StepSummary::step(vulnerableHostnameRef(t2, hostname), res, summary)
35+
exists(DataFlow::TypeTracker t2 | result = vulnerableHostnameRef(t2, hostname).track(t2, t))
5036
}
5137

5238
/** Gets a reference to a hostname that can be used to bind to all interfaces. */
@@ -59,21 +45,7 @@ private DataFlow::LocalSourceNode vulnerableAddressTuple(DataFlow::TypeTracker t
5945
t.start() and
6046
result.asExpr() = any(Tuple tup | tup.getElt(0) = vulnerableHostnameRef(hostname).asExpr())
6147
or
62-
// Due to bad performance when using normal setup with `vulnerableAddressTuple(t2, hostname).track(t2, t)`
63-
// we have inlined that code and forced a join
64-
exists(DataFlow::TypeTracker t2 |
65-
exists(DataFlow::StepSummary summary |
66-
vulnerableAddressTuple_first_join(t2, hostname, result, summary) and
67-
t = t2.append(summary)
68-
)
69-
)
70-
}
71-
72-
pragma[nomagic]
73-
private predicate vulnerableAddressTuple_first_join(
74-
DataFlow::TypeTracker t2, string hostname, DataFlow::Node res, DataFlow::StepSummary summary
75-
) {
76-
DataFlow::StepSummary::step(vulnerableAddressTuple(t2, hostname), res, summary)
48+
exists(DataFlow::TypeTracker t2 | result = vulnerableAddressTuple(t2, hostname).track(t2, t))
7749
}
7850

7951
/** Gets a reference to a tuple for which the first element is a hostname that can be used to bind to all interfaces. */

python/ql/src/semmle/python/ApiGraphs.qll

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -433,18 +433,7 @@ module API {
433433
use(_, src) and
434434
result = src
435435
or
436-
// Due to bad performance when using `trackUseNode(t2, attr_name).track(t2, t)`
437-
// we have inlined that code and forced a join
438-
exists(DataFlow::StepSummary summary |
439-
t = trackUseNode_first_join(src, result, summary).append(summary)
440-
)
441-
}
442-
443-
pragma[nomagic]
444-
private DataFlow::TypeTracker trackUseNode_first_join(
445-
DataFlow::LocalSourceNode src, DataFlow::LocalSourceNode res, DataFlow::StepSummary summary
446-
) {
447-
DataFlow::StepSummary::step(trackUseNode(src, result), res, summary)
436+
exists(DataFlow::TypeTracker t2 | result = trackUseNode(src, t2).track(t2, t))
448437
}
449438

450439
cached

python/ql/src/semmle/python/Concepts.qll

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -570,21 +570,7 @@ module Cryptography {
570570
arg = any(KeyGeneration::Range r).getKeySizeArg() and
571571
result = arg.getALocalSource()
572572
or
573-
// Due to bad performance when using normal setup with we have inlined that code and forced a join
574-
exists(DataFlow::TypeBackTracker t2 |
575-
exists(DataFlow::StepSummary summary |
576-
keysizeBacktracker_first_join(t2, arg, result, summary) and
577-
t = t2.prepend(summary)
578-
)
579-
)
580-
}
581-
582-
pragma[nomagic]
583-
private predicate keysizeBacktracker_first_join(
584-
DataFlow::TypeBackTracker t2, DataFlow::Node arg, DataFlow::Node res,
585-
DataFlow::StepSummary summary
586-
) {
587-
DataFlow::StepSummary::step(res, keysizeBacktracker(t2, arg), summary)
573+
exists(DataFlow::TypeBackTracker t2 | result = keysizeBacktracker(t2, arg).backtrack(t2, t))
588574
}
589575

590576
/** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */

python/ql/src/semmle/python/dataflow/new/TypeTracker.qll

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ module StepSummary {
5151
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
5252
*/
5353
cached
54-
predicate step(LocalSourceNode nodeFrom, Node nodeTo, StepSummary summary) {
55-
exists(Node mid | typePreservingStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
54+
predicate step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
55+
exists(Node mid | nodeFrom.flowsTo(mid) and smallstep(mid, nodeTo, summary))
5656
}
5757

5858
/**
@@ -63,7 +63,7 @@ module StepSummary {
6363
* type-preserving steps.
6464
*/
6565
predicate smallstep(Node nodeFrom, Node nodeTo, StepSummary summary) {
66-
typePreservingStep(nodeFrom, nodeTo) and
66+
jumpStep(nodeFrom, nodeTo) and
6767
summary = LevelStep()
6868
or
6969
callStep(nodeFrom, nodeTo) and summary = CallStep()
@@ -80,12 +80,6 @@ module StepSummary {
8080
}
8181
}
8282

83-
/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
84-
private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
85-
simpleLocalFlowStep(nodeFrom, nodeTo) or
86-
jumpStep(nodeFrom, nodeTo)
87-
}
88-
8983
/**
9084
* Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
9185
*
@@ -274,10 +268,10 @@ class TypeTracker extends TTypeTracker {
274268
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
275269
*/
276270
pragma[inline]
277-
TypeTracker step(LocalSourceNode nodeFrom, Node nodeTo) {
271+
TypeTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
278272
exists(StepSummary summary |
279-
StepSummary::step(nodeFrom, nodeTo, summary) and
280-
result = this.append(summary)
273+
StepSummary::step(nodeFrom, pragma[only_bind_out](nodeTo), pragma[only_bind_into](summary)) and
274+
result = this.append(pragma[only_bind_into](summary))
281275
)
282276
}
283277

@@ -312,7 +306,7 @@ class TypeTracker extends TTypeTracker {
312306
result = this.append(summary)
313307
)
314308
or
315-
typePreservingStep(nodeFrom, nodeTo) and
309+
simpleLocalFlowStep(nodeFrom, nodeTo) and
316310
result = this
317311
}
318312
}
@@ -453,7 +447,7 @@ class TypeBackTracker extends TTypeBackTracker {
453447
this = result.prepend(summary)
454448
)
455449
or
456-
typePreservingStep(nodeFrom, nodeTo) and
450+
simpleLocalFlowStep(nodeFrom, nodeTo) and
457451
this = result
458452
}
459453
}

python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ class LocalSourceNode extends Node {
4141
cached
4242
LocalSourceNode() {
4343
not comes_from_cfgnode(this) and
44-
not this instanceof ModuleVariableNode and
4544
// Currently, we create synthetic post-update nodes for
4645
// - arguments to calls that may modify said argument
4746
// - direct reads a writes of object attributes

python/ql/src/semmle/python/frameworks/Cryptography.qll

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -83,23 +83,11 @@ private module CryptographyModel {
8383
result.(DataFlow::CallCfgNode).getFunction() = curveClassWithKeySize(keySize) and
8484
origin = result
8585
or
86-
// Due to bad performance when using normal setup with we have inlined that code and forced a join
8786
exists(DataFlow::TypeTracker t2 |
88-
exists(DataFlow::StepSummary summary |
89-
curveClassInstanceWithKeySize_first_join(t2, keySize, origin, result, summary) and
90-
t = t2.append(summary)
91-
)
87+
result = curveClassInstanceWithKeySize(t2, keySize, origin).track(t2, t)
9288
)
9389
}
9490

95-
pragma[nomagic]
96-
private predicate curveClassInstanceWithKeySize_first_join(
97-
DataFlow::TypeTracker t2, int keySize, DataFlow::Node origin, DataFlow::Node res,
98-
DataFlow::StepSummary summary
99-
) {
100-
DataFlow::StepSummary::step(curveClassInstanceWithKeySize(t2, keySize, origin), res, summary)
101-
}
102-
10391
/** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */
10492
DataFlow::Node curveClassInstanceWithKeySize(int keySize, DataFlow::Node origin) {
10593
curveClassInstanceWithKeySize(DataFlow::TypeTracker::end(), keySize, origin).flowsTo(result)

python/ql/src/semmle/python/regex.qll

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -82,21 +82,7 @@ private DataFlow::LocalSourceNode re_flag_tracker(string flag_name, DataFlow::Ty
8282
result.asCfgNode() = binop
8383
)
8484
or
85-
// Due to bad performance when using normal setup with `re_flag_tracker(t2, attr_name).track(t2, t)`
86-
// we have inlined that code and forced a join
87-
exists(DataFlow::TypeTracker t2 |
88-
exists(DataFlow::StepSummary summary |
89-
re_flag_tracker_first_join(t2, flag_name, result, summary) and
90-
t = t2.append(summary)
91-
)
92-
)
93-
}
94-
95-
pragma[nomagic]
96-
private predicate re_flag_tracker_first_join(
97-
DataFlow::TypeTracker t2, string flag_name, DataFlow::Node res, DataFlow::StepSummary summary
98-
) {
99-
DataFlow::StepSummary::step(re_flag_tracker(flag_name, t2), res, summary)
85+
exists(DataFlow::TypeTracker t2 | result = re_flag_tracker(flag_name, t2).track(t2, t))
10086
}
10187

10288
/**

python/ql/test/experimental/dataflow/ApiGraphs/use.ql

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ class ApiUseTest extends InlineExpectationsTest {
99
override string getARelevantTag() { result = "use" }
1010

1111
private predicate relevant_node(API::Node a, DataFlow::Node n, Location l) {
12-
n = a.getAUse() and l = n.getLocation()
12+
n = a.getAUse() and
13+
l = n.getLocation() and
14+
// Module variable nodes have no suitable location, so it's best to simply exclude them entirely
15+
// from the inline tests.
16+
not n instanceof DataFlow::ModuleVariableNode
1317
}
1418

1519
override predicate hasActualResult(Location location, string element, string tag, string value) {

python/ql/test/experimental/dataflow/typetracking/moduleattr.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
module_tracker
22
| import_as_attr.py:1:6:1:11 | ControlFlowNode for ImportExpr |
33
module_attr_tracker
4+
| import_as_attr.py:0:0:0:0 | ModuleVariableNode for Global Variable attr_ref in Module import_as_attr |
45
| import_as_attr.py:1:20:1:35 | ControlFlowNode for ImportMember |
56
| import_as_attr.py:1:28:1:35 | GSSA Variable attr_ref |
67
| import_as_attr.py:3:1:3:1 | GSSA Variable x |

0 commit comments

Comments
 (0)