Skip to content

Commit a19373a

Browse files
authored
Merge pull request github#5727 from tausbn/python-use-localsource-in-stepsummary
Python: Use `LocalSourceNode` in `StepSummary::step`
2 parents 2054693 + 890f96d commit a19373a

File tree

14 files changed

+68
-142
lines changed

14 files changed

+68
-142
lines changed
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
lgtm,codescanning
2+
* The predicates `StepSummary::step` and `TypeTracker::step` in `TypeTracker.qll` have been changed
3+
to use the more restrictive type `LocalSourceNode` for their second argument. For cases where
4+
stepping between non-`LocalSourceNode`s is required, the `StepSummary::smallstep` predicate may be
5+
used instead.
6+
* The methods `Node::track` and `Node::backtrack` have been moved to the class `LocalSourceNode`. If
7+
the old behavior is required, one can use `LocalSourceNode::flowsTo` to add back the missing flow.

python/ql/src/Security/CVE-2018-1281/BindToAllInterfaces.ql

Lines changed: 2 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -32,21 +32,7 @@ private DataFlow::LocalSourceNode vulnerableHostnameRef(DataFlow::TypeTracker t,
3232
result.asExpr() = allInterfacesStrConst
3333
)
3434
or
35-
// Due to bad performance when using normal setup with `vulnerableHostnameRef(t2, hostname).track(t2, t)`
36-
// we have inlined that code and forced a join
37-
exists(DataFlow::TypeTracker t2 |
38-
exists(DataFlow::StepSummary summary |
39-
vulnerableHostnameRef_first_join(t2, hostname, result, summary) and
40-
t = t2.append(summary)
41-
)
42-
)
43-
}
44-
45-
pragma[nomagic]
46-
private predicate vulnerableHostnameRef_first_join(
47-
DataFlow::TypeTracker t2, string hostname, DataFlow::Node res, DataFlow::StepSummary summary
48-
) {
49-
DataFlow::StepSummary::step(vulnerableHostnameRef(t2, hostname), res, summary)
35+
exists(DataFlow::TypeTracker t2 | result = vulnerableHostnameRef(t2, hostname).track(t2, t))
5036
}
5137

5238
/** Gets a reference to a hostname that can be used to bind to all interfaces. */
@@ -59,21 +45,7 @@ private DataFlow::LocalSourceNode vulnerableAddressTuple(DataFlow::TypeTracker t
5945
t.start() and
6046
result.asExpr() = any(Tuple tup | tup.getElt(0) = vulnerableHostnameRef(hostname).asExpr())
6147
or
62-
// Due to bad performance when using normal setup with `vulnerableAddressTuple(t2, hostname).track(t2, t)`
63-
// we have inlined that code and forced a join
64-
exists(DataFlow::TypeTracker t2 |
65-
exists(DataFlow::StepSummary summary |
66-
vulnerableAddressTuple_first_join(t2, hostname, result, summary) and
67-
t = t2.append(summary)
68-
)
69-
)
70-
}
71-
72-
pragma[nomagic]
73-
private predicate vulnerableAddressTuple_first_join(
74-
DataFlow::TypeTracker t2, string hostname, DataFlow::Node res, DataFlow::StepSummary summary
75-
) {
76-
DataFlow::StepSummary::step(vulnerableAddressTuple(t2, hostname), res, summary)
48+
exists(DataFlow::TypeTracker t2 | result = vulnerableAddressTuple(t2, hostname).track(t2, t))
7749
}
7850

7951
/** Gets a reference to a tuple for which the first element is a hostname that can be used to bind to all interfaces. */

python/ql/src/semmle/python/ApiGraphs.qll

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -422,9 +422,9 @@ module API {
422422
}
423423

424424
/**
425-
* Gets a data-flow node to which `nd`, which is a use of an API-graph node, flows.
425+
* Gets a data-flow node to which `src`, which is a use of an API-graph node, flows.
426426
*
427-
* The flow from `nd` to that node may be inter-procedural.
427+
* The flow from `src` to that node may be inter-procedural.
428428
*/
429429
private DataFlow::LocalSourceNode trackUseNode(
430430
DataFlow::LocalSourceNode src, DataFlow::TypeTracker t
@@ -433,23 +433,19 @@ module API {
433433
use(_, src) and
434434
result = src
435435
or
436-
// Due to bad performance when using `trackUseNode(t2, attr_name).track(t2, t)`
437-
// we have inlined that code and forced a join
438-
exists(DataFlow::StepSummary summary |
439-
t = trackUseNode_first_join(src, result, summary).append(summary)
440-
)
441-
}
442-
443-
pragma[nomagic]
444-
private DataFlow::TypeTracker trackUseNode_first_join(
445-
DataFlow::LocalSourceNode src, DataFlow::LocalSourceNode res, DataFlow::StepSummary summary
446-
) {
447-
DataFlow::StepSummary::step(trackUseNode(src, result), res, summary)
436+
exists(DataFlow::TypeTracker t2 | result = trackUseNode(src, t2).track(t2, t))
448437
}
449438

439+
/**
440+
* Gets a data-flow node to which `src`, which is a use of an API-graph node, flows.
441+
*
442+
* The flow from `src` to that node may be inter-procedural.
443+
*/
450444
cached
451445
DataFlow::LocalSourceNode trackUseNode(DataFlow::LocalSourceNode src) {
452-
result = trackUseNode(src, DataFlow::TypeTracker::end())
446+
result = trackUseNode(src, DataFlow::TypeTracker::end()) and
447+
// We exclude module variable nodes, as these do not correspond to real uses.
448+
not result instanceof DataFlow::ModuleVariableNode
453449
}
454450

455451
/**

python/ql/src/semmle/python/Concepts.qll

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -570,21 +570,7 @@ module Cryptography {
570570
arg = any(KeyGeneration::Range r).getKeySizeArg() and
571571
result = arg.getALocalSource()
572572
or
573-
// Due to bad performance when using normal setup with we have inlined that code and forced a join
574-
exists(DataFlow::TypeBackTracker t2 |
575-
exists(DataFlow::StepSummary summary |
576-
keysizeBacktracker_first_join(t2, arg, result, summary) and
577-
t = t2.prepend(summary)
578-
)
579-
)
580-
}
581-
582-
pragma[nomagic]
583-
private predicate keysizeBacktracker_first_join(
584-
DataFlow::TypeBackTracker t2, DataFlow::Node arg, DataFlow::Node res,
585-
DataFlow::StepSummary summary
586-
) {
587-
DataFlow::StepSummary::step(res, keysizeBacktracker(t2, arg), summary)
573+
exists(DataFlow::TypeBackTracker t2 | result = keysizeBacktracker(t2, arg).backtrack(t2, t))
588574
}
589575

590576
/** Gets a back-reference to the keysize argument `arg` that was used to generate a new key-pair. */

python/ql/src/semmle/python/dataflow/new/TypeTracker.qll

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ module StepSummary {
5151
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
5252
*/
5353
cached
54-
predicate step(LocalSourceNode nodeFrom, Node nodeTo, StepSummary summary) {
55-
exists(Node mid | typePreservingStep*(nodeFrom, mid) and smallstep(mid, nodeTo, summary))
54+
predicate step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo, StepSummary summary) {
55+
exists(Node mid | nodeFrom.flowsTo(mid) and smallstep(mid, nodeTo, summary))
5656
}
5757

5858
/**
@@ -63,7 +63,7 @@ module StepSummary {
6363
* type-preserving steps.
6464
*/
6565
predicate smallstep(Node nodeFrom, Node nodeTo, StepSummary summary) {
66-
typePreservingStep(nodeFrom, nodeTo) and
66+
jumpStep(nodeFrom, nodeTo) and
6767
summary = LevelStep()
6868
or
6969
callStep(nodeFrom, nodeTo) and summary = CallStep()
@@ -80,12 +80,6 @@ module StepSummary {
8080
}
8181
}
8282

83-
/** Holds if it's reasonable to expect the data flow step from `nodeFrom` to `nodeTo` to preserve types. */
84-
private predicate typePreservingStep(Node nodeFrom, Node nodeTo) {
85-
simpleLocalFlowStep(nodeFrom, nodeTo) or
86-
jumpStep(nodeFrom, nodeTo)
87-
}
88-
8983
/**
9084
* Gets a callable for the call where `nodeFrom` is used as the `i`'th argument.
9185
*
@@ -274,10 +268,10 @@ class TypeTracker extends TTypeTracker {
274268
* heap and/or inter-procedural step from `nodeFrom` to `nodeTo`.
275269
*/
276270
pragma[inline]
277-
TypeTracker step(LocalSourceNode nodeFrom, Node nodeTo) {
271+
TypeTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
278272
exists(StepSummary summary |
279-
StepSummary::step(nodeFrom, nodeTo, summary) and
280-
result = this.append(summary)
273+
StepSummary::step(nodeFrom, pragma[only_bind_out](nodeTo), pragma[only_bind_into](summary)) and
274+
result = this.append(pragma[only_bind_into](summary))
281275
)
282276
}
283277

@@ -312,7 +306,7 @@ class TypeTracker extends TTypeTracker {
312306
result = this.append(summary)
313307
)
314308
or
315-
typePreservingStep(nodeFrom, nodeTo) and
309+
simpleLocalFlowStep(nodeFrom, nodeTo) and
316310
result = this
317311
}
318312
}
@@ -417,8 +411,8 @@ class TypeBackTracker extends TTypeBackTracker {
417411
pragma[inline]
418412
TypeBackTracker step(LocalSourceNode nodeFrom, LocalSourceNode nodeTo) {
419413
exists(StepSummary summary |
420-
StepSummary::step(nodeFrom, nodeTo, summary) and
421-
this = result.prepend(summary)
414+
StepSummary::step(pragma[only_bind_out](nodeFrom), nodeTo, pragma[only_bind_into](summary)) and
415+
this = result.prepend(pragma[only_bind_into](summary))
422416
)
423417
}
424418

@@ -453,7 +447,7 @@ class TypeBackTracker extends TTypeBackTracker {
453447
this = result.prepend(summary)
454448
)
455449
or
456-
typePreservingStep(nodeFrom, nodeTo) and
450+
simpleLocalFlowStep(nodeFrom, nodeTo) and
457451
this = result
458452
}
459453
}

python/ql/src/semmle/python/dataflow/new/internal/DataFlowPublic.qll

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -119,22 +119,6 @@ class Node extends TNode {
119119
/** Gets the expression corresponding to this node, if any. */
120120
Expr asExpr() { none() }
121121

122-
/**
123-
* Gets a node that this node may flow to using one heap and/or interprocedural step.
124-
*
125-
* See `TypeTracker` for more details about how to use this.
126-
*/
127-
pragma[inline]
128-
Node track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
129-
130-
/**
131-
* Gets a node that may flow into this one using one heap and/or interprocedural step.
132-
*
133-
* See `TypeBackTracker` for more details about how to use this.
134-
*/
135-
pragma[inline]
136-
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
137-
138122
/**
139123
* Gets a local source node from which data may flow to this node in zero or more local data-flow steps.
140124
*/

python/ql/src/semmle/python/dataflow/new/internal/LocalSources.qll

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,6 @@ import python
1010
import DataFlowPublic
1111
private import DataFlowPrivate
1212

13-
private predicate comes_from_cfgnode(Node node) {
14-
exists(CfgNode first, Node second |
15-
simpleLocalFlowStep(first, second) and
16-
simpleLocalFlowStep*(second, node)
17-
)
18-
}
19-
2013
/**
2114
* A data flow node that is a source of local flow. This includes things like
2215
* - Expressions
@@ -40,8 +33,7 @@ private predicate comes_from_cfgnode(Node node) {
4033
class LocalSourceNode extends Node {
4134
cached
4235
LocalSourceNode() {
43-
not comes_from_cfgnode(this) and
44-
not this instanceof ModuleVariableNode and
36+
not simpleLocalFlowStep(_, this) and
4537
// Currently, we create synthetic post-update nodes for
4638
// - arguments to calls that may modify said argument
4739
// - direct reads a writes of object attributes
@@ -85,6 +77,22 @@ class LocalSourceNode extends Node {
8577
* Gets a call to this node.
8678
*/
8779
CallCfgNode getACall() { Cached::call(this, result) }
80+
81+
/**
82+
* Gets a node that this node may flow to using one heap and/or interprocedural step.
83+
*
84+
* See `TypeTracker` for more details about how to use this.
85+
*/
86+
pragma[inline]
87+
LocalSourceNode track(TypeTracker t2, TypeTracker t) { t = t2.step(this, result) }
88+
89+
/**
90+
* Gets a node that may flow into this one using one heap and/or interprocedural step.
91+
*
92+
* See `TypeBackTracker` for more details about how to use this.
93+
*/
94+
pragma[inline]
95+
LocalSourceNode backtrack(TypeBackTracker t2, TypeBackTracker t) { t2 = t.step(result, this) }
8896
}
8997

9098
cached

python/ql/src/semmle/python/frameworks/Cryptography.qll

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -83,23 +83,11 @@ private module CryptographyModel {
8383
result.(DataFlow::CallCfgNode).getFunction() = curveClassWithKeySize(keySize) and
8484
origin = result
8585
or
86-
// Due to bad performance when using normal setup with we have inlined that code and forced a join
8786
exists(DataFlow::TypeTracker t2 |
88-
exists(DataFlow::StepSummary summary |
89-
curveClassInstanceWithKeySize_first_join(t2, keySize, origin, result, summary) and
90-
t = t2.append(summary)
91-
)
87+
result = curveClassInstanceWithKeySize(t2, keySize, origin).track(t2, t)
9288
)
9389
}
9490

95-
pragma[nomagic]
96-
private predicate curveClassInstanceWithKeySize_first_join(
97-
DataFlow::TypeTracker t2, int keySize, DataFlow::Node origin, DataFlow::Node res,
98-
DataFlow::StepSummary summary
99-
) {
100-
DataFlow::StepSummary::step(curveClassInstanceWithKeySize(t2, keySize, origin), res, summary)
101-
}
102-
10391
/** Gets a reference to a predefined curve class instance with a specific key size (in bits), as well as the origin of the class. */
10492
DataFlow::Node curveClassInstanceWithKeySize(int keySize, DataFlow::Node origin) {
10593
curveClassInstanceWithKeySize(DataFlow::TypeTracker::end(), keySize, origin).flowsTo(result)

python/ql/src/semmle/python/frameworks/Django.qll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1303,7 +1303,7 @@ private module PrivateDjango {
13031303
}
13041304

13051305
/** Gets a reference to the `django.http.response.HttpResponse.write` function. */
1306-
private DataFlow::Node write(
1306+
private DataFlow::LocalSourceNode write(
13071307
django::http::response::HttpResponse::InstanceSource instance, DataFlow::TypeTracker t
13081308
) {
13091309
t.startInAttr("write") and
@@ -1315,7 +1315,7 @@ private module PrivateDjango {
13151315

13161316
/** Gets a reference to the `django.http.response.HttpResponse.write` function. */
13171317
DataFlow::Node write(django::http::response::HttpResponse::InstanceSource instance) {
1318-
result = write(instance, DataFlow::TypeTracker::end())
1318+
write(instance, DataFlow::TypeTracker::end()).flowsTo(result)
13191319
}
13201320

13211321
/**

python/ql/src/semmle/python/frameworks/PEP249.qll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,15 @@ module Connection {
5454
}
5555

5656
/** Gets a reference to an instance of `db.Connection`. */
57-
private DataFlow::Node instance(DataFlow::TypeTracker t) {
57+
private DataFlow::LocalSourceNode instance(DataFlow::TypeTracker t) {
5858
t.start() and
5959
result instanceof InstanceSource
6060
or
6161
exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t))
6262
}
6363

6464
/** Gets a reference to an instance of `db.Connection`. */
65-
DataFlow::Node instance() { result = instance(DataFlow::TypeTracker::end()) }
65+
DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) }
6666
}
6767

6868
/**
@@ -71,26 +71,26 @@ module Connection {
7171
*/
7272
module cursor {
7373
/** Gets a reference to the `cursor` method on a connection. */
74-
private DataFlow::Node methodRef(DataFlow::TypeTracker t) {
74+
private DataFlow::LocalSourceNode methodRef(DataFlow::TypeTracker t) {
7575
t.startInAttr("cursor") and
7676
result = Connection::instance()
7777
or
7878
exists(DataFlow::TypeTracker t2 | result = methodRef(t2).track(t2, t))
7979
}
8080

8181
/** Gets a reference to the `cursor` method on a connection. */
82-
DataFlow::Node methodRef() { result = methodRef(DataFlow::TypeTracker::end()) }
82+
DataFlow::Node methodRef() { methodRef(DataFlow::TypeTracker::end()).flowsTo(result) }
8383

8484
/** Gets a reference to a result of calling the `cursor` method on a connection. */
85-
private DataFlow::Node methodResult(DataFlow::TypeTracker t) {
85+
private DataFlow::LocalSourceNode methodResult(DataFlow::TypeTracker t) {
8686
t.start() and
8787
result.asCfgNode().(CallNode).getFunction() = methodRef().asCfgNode()
8888
or
8989
exists(DataFlow::TypeTracker t2 | result = methodResult(t2).track(t2, t))
9090
}
9191

9292
/** Gets a reference to a result of calling the `cursor` method on a connection. */
93-
DataFlow::Node methodResult() { result = methodResult(DataFlow::TypeTracker::end()) }
93+
DataFlow::Node methodResult() { methodResult(DataFlow::TypeTracker::end()).flowsTo(result) }
9494
}
9595

9696
/**
@@ -101,7 +101,7 @@ module cursor {
101101
*
102102
* See https://www.python.org/dev/peps/pep-0249/#id15.
103103
*/
104-
private DataFlow::Node execute(DataFlow::TypeTracker t) {
104+
private DataFlow::LocalSourceNode execute(DataFlow::TypeTracker t) {
105105
t.startInAttr("execute") and
106106
result in [cursor::methodResult(), Connection::instance()]
107107
or
@@ -116,7 +116,7 @@ private DataFlow::Node execute(DataFlow::TypeTracker t) {
116116
*
117117
* See https://www.python.org/dev/peps/pep-0249/#id15.
118118
*/
119-
DataFlow::Node execute() { result = execute(DataFlow::TypeTracker::end()) }
119+
DataFlow::Node execute() { execute(DataFlow::TypeTracker::end()).flowsTo(result) }
120120

121121
/** A call to the `execute` method on a cursor (or on a connection). */
122122
private class ExecuteCall extends SqlExecution::Range, DataFlow::CallCfgNode {

0 commit comments

Comments
 (0)