Skip to content

Commit 00a71a1

Browse files
committed
Python: Port sensitive data modeling
No longer using points-to 🎉
1 parent 3b68c87 commit 00a71a1

File tree

3 files changed

+110
-17
lines changed

3 files changed

+110
-17
lines changed

python/ql/src/semmle/python/dataflow/new/SensitiveDataSources.qll

Lines changed: 86 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ private import semmle.python.dataflow.new.DataFlow
88
// Need to import since frameworks can extend `RemoteFlowSource::Range`
99
private import semmle.python.Frameworks
1010
private import semmle.python.Concepts
11-
private import semmle.python.security.SensitiveData as OldSensitiveData
1211
private import semmle.python.security.internal.SensitiveDataHeuristics as SensitiveDataHeuristics
1312

1413
// We export these explicitly, so we don't also export the `HeuristicNames` module.
@@ -49,17 +48,94 @@ module SensitiveDataSource {
4948
}
5049
}
5150

52-
// TODO: rewrite this to not rely on the old points-to implementation
53-
private class PortOfOldModeling extends SensitiveDataSource::Range {
54-
OldSensitiveData::SensitiveData::Source oldSensitiveSource;
51+
/** Actual sensitive data modeling */
52+
private module SensitiveDataModeling {
53+
private import SensitiveDataHeuristics::HeuristicNames
5554

56-
PortOfOldModeling() { this.asCfgNode() = oldSensitiveSource }
55+
/**
56+
* Gets a reference to a function that is considered to be a sensitive source of
57+
* `classification`.
58+
*/
59+
private DataFlow::LocalSourceNode sensitiveFunction(
60+
DataFlow::TypeTracker t, SensitiveDataClassification classification
61+
) {
62+
t.start() and
63+
exists(Function f |
64+
nameIndicatesSensitiveData(f.getName(), classification) and
65+
result.asExpr() = f.getDefinition()
66+
)
67+
or
68+
exists(DataFlow::TypeTracker t2 | result = sensitiveFunction(t2, classification).track(t2, t))
69+
}
5770

58-
override SensitiveDataClassification getClassification() {
59-
exists(OldSensitiveData::SensitiveData classification |
60-
oldSensitiveSource.isSourceOf(classification)
61-
|
62-
classification = "sensitive.data." + result
71+
/**
72+
* Gets a reference to a function that is considered to be a sensitive source of
73+
* `classification`.
74+
*/
75+
DataFlow::Node sensitiveFunction(SensitiveDataClassification classification) {
76+
sensitiveFunction(DataFlow::TypeTracker::end(), classification).flowsTo(result)
77+
}
78+
79+
/**
80+
* Gets a reference to a string constant that, if used as the key in a lookup,
81+
* indicates the presence of sensitive data with `classification`.
82+
*/
83+
private DataFlow::LocalSourceNode sensitiveLookupStringConst(
84+
DataFlow::TypeTracker t, SensitiveDataClassification classification
85+
) {
86+
t.start() and
87+
nameIndicatesSensitiveData(result.asExpr().(StrConst).getText(), classification)
88+
or
89+
exists(DataFlow::TypeTracker t2 |
90+
result = sensitiveLookupStringConst(t2, classification).track(t2, t)
6391
)
6492
}
93+
94+
/**
95+
* Gets a reference to a string constant that, if used as the key in a lookup,
96+
* indicates the presence of sensitive data with `classification`.
97+
*/
98+
DataFlow::Node sensitiveLookupStringConst(SensitiveDataClassification classification) {
99+
sensitiveLookupStringConst(DataFlow::TypeTracker::end(), classification).flowsTo(result)
100+
}
101+
102+
/** A function call that is considered a source of sensitive data. */
103+
class SensitiveFunctionCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
104+
SensitiveDataClassification classification;
105+
106+
SensitiveFunctionCall() {
107+
this.getFunction() = sensitiveFunction(classification)
108+
or
109+
nameIndicatesSensitiveData(this.getFunction().asCfgNode().(NameNode).getId(), classification)
110+
}
111+
112+
override SensitiveDataClassification getClassification() { result = classification }
113+
}
114+
115+
/** An attribute access that is considered a source of sensitive data. */
116+
class SensitiveAttributeAccess extends SensitiveDataSource::Range {
117+
SensitiveDataClassification classification;
118+
119+
SensitiveAttributeAccess() {
120+
nameIndicatesSensitiveData(this.(DataFlow::AttrRead).getAttributeName(), classification)
121+
or
122+
// I considered excluding any `from ... import something_sensitive`, but then realized that
123+
// we should flag up `form ... import password as ...` as a password
124+
this.(DataFlow::AttrRead).getAttributeNameExpr() = sensitiveLookupStringConst(classification)
125+
}
126+
127+
override SensitiveDataClassification getClassification() { result = classification }
128+
}
129+
130+
/** A call to `get` on an object, where the key indicates the result will be sensitive data. */
131+
class SensitiveGetCall extends SensitiveDataSource::Range, DataFlow::CallCfgNode {
132+
SensitiveDataClassification classification;
133+
134+
SensitiveGetCall() {
135+
this.getFunction().asCfgNode().(AttrNode).getName() = "get" and
136+
this.getArg(0) = sensitiveLookupStringConst(classification)
137+
}
138+
139+
override SensitiveDataClassification getClassification() { result = classification }
140+
}
65141
}

python/ql/test/experimental/dataflow/sensitive-data/TestSensitiveDataSources.ql

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,32 @@ import python
22
import semmle.python.dataflow.new.DataFlow
33
import TestUtilities.InlineExpectationsTest
44
import semmle.python.dataflow.new.SensitiveDataSources
5+
private import semmle.python.ApiGraphs
56

67
class SensitiveDataSourcesTest extends InlineExpectationsTest {
78
SensitiveDataSourcesTest() { this = "SensitiveDataSourcesTest" }
89

9-
override string getARelevantTag() { result = "SensitiveDataSource" }
10+
override string getARelevantTag() { result in ["SensitiveDataSource", "SensitiveUse"] }
1011

1112
override predicate hasActualResult(Location location, string element, string tag, string value) {
1213
exists(location.getFile().getRelativePath()) and
1314
exists(SensitiveDataSource source |
14-
location = source.getLocation() and
15-
element = source.toString() and
16-
value = source.getClassification() and
17-
tag = "SensitiveDataSource"
15+
(
16+
location = source.getLocation() and
17+
element = source.toString() and
18+
value = source.getClassification() and
19+
tag = "SensitiveDataSource"
20+
)
21+
or
22+
exists(DataFlow::Node use |
23+
use = API::builtin("print").getACall().getArg(_) and
24+
DataFlow::localFlow(source, use) and
25+
location = use.getLocation() and
26+
element = use.toString() and
27+
value = source.getClassification() and
28+
tag = "SensitiveUse"
29+
30+
)
1831
)
1932
}
2033
}

python/ql/test/experimental/dataflow/sensitive-data/test.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

2-
from not_found import get_passwd, account_id
2+
from not_found import get_passwd # $ SensitiveDataSource=password
3+
from not_found import account_id # $ SensitiveDataSource=id
34

45
def get_password():
56
pass
@@ -30,7 +31,7 @@ def encrypt_password(pwd):
3031

3132
# plain variables
3233
password = some_function()
33-
print(password) # $ MISSING: SensitiveDataSource=password
34+
print(password) # $ MISSING: SensitiveUse=password
3435

3536
# Special handling of lookups of sensitive properties
3637
request.args["password"], # $ MISSING: SensitiveDataSource=password
@@ -41,3 +42,6 @@ def encrypt_password(pwd):
4142

4243
# I don't think handling `getlist` is super important, just included it to show what we don't handle
4344
request.args.getlist("password")[0] # $ MISSING: SensitiveDataSource=password
45+
46+
from not_found import password2 as foo # $ SensitiveDataSource=password
47+
print(foo) # $ SensitiveUse=password

0 commit comments

Comments
 (0)