Skip to content

Commit ea0c1d7

Browse files
committed
Python: Better handling of sensitive functions
This solution was the best I could come up with, but it _is_ a bit brittle since you need to remember to add this additional taint step to any configuration that relies on sensitive data sources... I don't see an easy way around this though :|
1 parent f167143 commit ea0c1d7

File tree

4 files changed

+60
-6
lines changed

4 files changed

+60
-6
lines changed

python/ql/src/semmle/python/dataflow/new/SensitiveDataSources.qll

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ private module SensitiveDataModeling {
9393
/**
9494
* Gets a reference to a string constant that, if used as the key in a lookup,
9595
* indicates the presence of sensitive data with `classification`.
96+
*
97+
* Also see `extraStepForCalls`.
9698
*/
9799
DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
98100
sensitiveLookupStringConst(DataFlow::TypeTracker::end(), classification).flowsTo(result)
@@ -105,12 +107,49 @@ private module SensitiveDataModeling {
105107
SensitiveFunctionCall() {
106108
this.getFunction() = sensitiveFunction(classification)
107109
or
110+
// to cover functions that we don't have the definition for, and where the
111+
// reference to the function has not already been marked as being sensitive
108112
nameIndicatesSensitiveData(this.getFunction().asCfgNode().(NameNode).getId(), classification)
109113
}
110114

111115
override SensitiveDataClassification getClassification() { result = classification }
112116
}
113117

118+
/**
119+
* Holds if the step from `nodeFrom` to `nodeTo` should be considered a
120+
* taint-flow step for sensitive-data, to ensure calls are handled correctly.
121+
*
122+
* To handle calls properly, while preserving a good source for path explanations,
123+
* you need to include this predicate as an additional taint step in your taint-tracking
124+
* configurations.
125+
*
126+
* The core problem can be illustrated by the example below. If we consider the
127+
* `print` a sink, what path and what source do we want to show? My initial approach
128+
* would be to use type-tracking to propagate from the `not_found.get_passwd` attribute
129+
* lookup, to the use of `non_sensitive_name`, and then create a new `SensitiveDataSource::Range`
130+
* like `SensitiveFunctionCall`. Although that seems likely to work, it will also end up
131+
* with a non-optimal path, which starts at _bad source_, and therefore doesn't show
132+
* how we figured out that `non_sensitive_name`
133+
* could be a function that returns a password (and in cases where there is many calls to
134+
* `my_func` it will be annoying for someone to figure this out manually).
135+
*
136+
* By including this additional taint-step in the taint-tracking configuration, it's possible
137+
* to get a path explanation going from _good source_ to the sink.
138+
*
139+
* ```python
140+
* def my_func(non_sensitive_name):
141+
* x = non_sensitive_name() # <-- bad source
142+
* print(x) # <-- sink
143+
*
144+
* import not_found
145+
* f = not_found.get_passwd # <-- good source
146+
* my_func(f)
147+
* ```
148+
*/
149+
predicate extraStepForCalls(DataFlow::Node nodeFrom, DataFlow::CallCfgNode nodeTo) {
150+
nodeTo.getFunction() = nodeFrom
151+
}
152+
114153
/**
115154
* Any kind of variable assignment (also including with/for) where the name indicates
116155
* it contains sensitive data.
@@ -200,3 +239,5 @@ private module SensitiveDataModeling {
200239
override SensitiveDataClassification getClassification() { result = classification }
201240
}
202241
}
242+
243+
predicate sensitiveDataExtraStepForCalls = SensitiveDataModeling::extraStepForCalls/2;

python/ql/src/semmle/python/security/dataflow/WeakSensitiveDataHashing.qll

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ private import semmle.python.dataflow.new.TaintTracking
1313
private import semmle.python.Concepts
1414
private import semmle.python.dataflow.new.RemoteFlowSources
1515
private import semmle.python.dataflow.new.BarrierGuards
16+
private import semmle.python.dataflow.new.SensitiveDataSources
1617

1718
/**
1819
* Provides a taint-tracking configuration for detecting use of a broken or weak
@@ -38,6 +39,10 @@ module NormalHashFunction {
3839
or
3940
node instanceof Sanitizer
4041
}
42+
43+
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
44+
sensitiveDataExtraStepForCalls(node1, node2)
45+
}
4146
}
4247
}
4348

@@ -70,5 +75,9 @@ module ComputationallyExpensiveHashFunction {
7075
or
7176
node instanceof Sanitizer
7277
}
78+
79+
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
80+
sensitiveDataExtraStepForCalls(node1, node2)
81+
}
7382
}
7483
}

python/ql/test/experimental/dataflow/sensitive-data/TestSensitiveDataSources.ql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ class SensitiveUseConfiguration extends TaintTracking::Configuration {
4040
override predicate isSink(DataFlow::Node node) {
4141
node = API::builtin("print").getACall().getArg(_)
4242
}
43+
44+
override predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) {
45+
sensitiveDataExtraStepForCalls(node1, node2)
46+
}
4347
}
4448
// import DataFlow::PathGraph
4549
// from SensitiveUseConfiguration cfg, DataFlow::PathNode source, DataFlow::PathNode sink

python/ql/test/experimental/dataflow/sensitive-data/test.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,17 @@ def encrypt_password(pwd):
2929
print(x) # $ SensitiveUse=password
3030

3131
f = get_passwd
32-
x = f() # $ MISSING: SensitiveDataSource=password
33-
print(x) # $ MISSING: SensitiveUse=password
32+
x = f()
33+
print(x) # $ SensitiveUse=password
3434

3535
import not_found
3636
f = not_found.get_passwd # $ SensitiveDataSource=password
37-
x = f() # $ MISSING: SensitiveDataSource=password
38-
print(x) # $ MISSING: SensitiveUse=password
37+
x = f()
38+
print(x) # $ SensitiveUse=password
3939

4040
def my_func(non_sensitive_name):
41-
x = non_sensitive_name() # $ MISSING: SensitiveDataSource=password
42-
print(x) # $ MISSING: SensitiveUse=password
41+
x = non_sensitive_name()
42+
print(x) # $ SensitiveUse=password
4343
f = not_found.get_passwd # $ SensitiveDataSource=password
4444
my_func(f)
4545

0 commit comments

Comments
 (0)