5
5
6
6
private import python
7
7
private import semmle.python.dataflow.new.DataFlow
8
- // Need to import since frameworks can extend `RemoteFlowSource ::Range`
8
+ // Need to import `semmle.python.Frameworks` since frameworks can extend `SensitiveDataSource ::Range`
9
9
private import semmle.python.Frameworks
10
- private import semmle.python.Concepts
11
- private import semmle.python.security.SensitiveData as OldSensitiveData
10
+ private import semmle.python.security.internal.SensitiveDataHeuristics as SensitiveDataHeuristics
11
+
12
+ // We export these explicitly, so we don't also export the `HeuristicNames` module.
13
+ class SensitiveDataClassification = SensitiveDataHeuristics:: SensitiveDataClassification ;
14
+
15
+ module SensitiveDataClassification = SensitiveDataHeuristics:: SensitiveDataClassification;
12
16
13
17
/**
14
18
* A data flow source of sensitive data, such as secrets, certificates, or passwords.
@@ -22,13 +26,9 @@ class SensitiveDataSource extends DataFlow::Node {
22
26
SensitiveDataSource ( ) { this = range }
23
27
24
28
/**
25
- * INTERNAL: Do not use.
26
- *
27
- * This will be rewritten to have better types soon, and therefore should only be used internally until then.
28
- *
29
29
* Gets the classification of the sensitive data.
30
30
*/
31
- string getClassification ( ) { result = range .getClassification ( ) }
31
+ SensitiveDataClassification getClassification ( ) { result = range .getClassification ( ) }
32
32
}
33
33
34
34
/** Provides a class for modeling new sources of sensitive data, such as secrets, certificates, or passwords. */
@@ -41,26 +41,225 @@ module SensitiveDataSource {
41
41
*/
42
42
abstract class Range extends DataFlow:: Node {
43
43
/**
44
- * INTERNAL: Do not use.
45
- *
46
- * This will be rewritten to have better types soon, and therefore should only be used internally until then.
47
- *
48
44
* Gets the classification of the sensitive data.
49
45
*/
50
- abstract string getClassification ( ) ;
46
+ abstract SensitiveDataClassification getClassification ( ) ;
51
47
}
52
48
}
53
49
54
- private class PortOfOldModeling extends SensitiveDataSource:: Range {
55
- OldSensitiveData:: SensitiveData:: Source oldSensitiveSource ;
50
+ /** Actual sensitive data modeling */
51
+ private module SensitiveDataModeling {
52
+ private import SensitiveDataHeuristics:: HeuristicNames
56
53
57
- PortOfOldModeling ( ) { this .asCfgNode ( ) = oldSensitiveSource }
54
+ /**
55
+ * Gets a reference to a function that is considered to be a sensitive source of
56
+ * `classification`.
57
+ */
58
+ private DataFlow:: LocalSourceNode sensitiveFunction (
59
+ DataFlow:: TypeTracker t , SensitiveDataClassification classification
60
+ ) {
61
+ t .start ( ) and
62
+ exists ( Function f |
63
+ nameIndicatesSensitiveData ( f .getName ( ) , classification ) and
64
+ result .asExpr ( ) = f .getDefinition ( )
65
+ )
66
+ or
67
+ exists ( DataFlow:: TypeTracker t2 | result = sensitiveFunction ( t2 , classification ) .track ( t2 , t ) )
68
+ }
58
69
59
- override string getClassification ( ) {
60
- exists ( OldSensitiveData:: SensitiveData classification |
61
- oldSensitiveSource .isSourceOf ( classification )
62
- |
63
- classification = "sensitive.data." + result
70
+ /**
71
+ * Gets a reference to a function that is considered to be a sensitive source of
72
+ * `classification`.
73
+ */
74
+ DataFlow:: Node sensitiveFunction ( SensitiveDataClassification classification ) {
75
+ sensitiveFunction ( DataFlow:: TypeTracker:: end ( ) , classification ) .flowsTo ( result )
76
+ }
77
+
78
+ /**
79
+ * Gets a reference to a string constant that, if used as the key in a lookup,
80
+ * indicates the presence of sensitive data with `classification`.
81
+ */
82
+ private DataFlow:: LocalSourceNode sensitiveLookupStringConst (
83
+ DataFlow:: TypeTracker t , SensitiveDataClassification classification
84
+ ) {
85
+ t .start ( ) and
86
+ nameIndicatesSensitiveData ( result .asExpr ( ) .( StrConst ) .getText ( ) , classification )
87
+ or
88
+ exists ( DataFlow:: TypeTracker t2 |
89
+ result = sensitiveLookupStringConst ( t2 , classification ) .track ( t2 , t )
64
90
)
65
91
}
92
+
93
+ /**
94
+ * Gets a reference to a string constant that, if used as the key in a lookup,
95
+ * indicates the presence of sensitive data with `classification`.
96
+ *
97
+ * Also see `extraStepForCalls`.
98
+ */
99
+ DataFlow:: Node sensitiveLookupStringConst ( SensitiveDataClassification classification ) {
100
+ sensitiveLookupStringConst ( DataFlow:: TypeTracker:: end ( ) , classification ) .flowsTo ( result )
101
+ }
102
+
103
+ /** A function call that is considered a source of sensitive data. */
104
+ class SensitiveFunctionCall extends SensitiveDataSource:: Range , DataFlow:: CallCfgNode {
105
+ SensitiveDataClassification classification ;
106
+
107
+ SensitiveFunctionCall ( ) {
108
+ this .getFunction ( ) = sensitiveFunction ( classification )
109
+ or
110
+ // to cover functions that we don't have the definition for, and where the
111
+ // reference to the function has not already been marked as being sensitive
112
+ nameIndicatesSensitiveData ( this .getFunction ( ) .asCfgNode ( ) .( NameNode ) .getId ( ) , classification )
113
+ }
114
+
115
+ override SensitiveDataClassification getClassification ( ) { result = classification }
116
+ }
117
+
118
+ /**
119
+ * Tracks any modeled source of sensitive data (with any classification),
120
+ * to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
121
+ */
122
+ private DataFlow:: LocalSourceNode possibleSensitiveCallable ( DataFlow:: TypeTracker t ) {
123
+ t .start ( ) and
124
+ result instanceof SensitiveDataSource
125
+ or
126
+ exists ( DataFlow:: TypeTracker t2 | result = possibleSensitiveCallable ( t2 ) .track ( t2 , t ) )
127
+ }
128
+
129
+ /**
130
+ * Tracks any modeled source of sensitive data (with any classification),
131
+ * to limit the scope of `extraStepForCalls`. See it's QLDoc for more context.
132
+ */
133
+ private DataFlow:: Node possibleSensitiveCallable ( ) {
134
+ possibleSensitiveCallable ( DataFlow:: TypeTracker:: end ( ) ) .flowsTo ( result )
135
+ }
136
+
137
+ /**
138
+ * Holds if the step from `nodeFrom` to `nodeTo` should be considered a
139
+ * taint-flow step for sensitive-data, to ensure calls are handled correctly.
140
+ *
141
+ * To handle calls properly, while preserving a good source for path explanations,
142
+ * you need to include this predicate as an additional taint step in your taint-tracking
143
+ * configurations.
144
+ *
145
+ * The core problem can be illustrated by the example below. If we consider the
146
+ * `print` a sink, what path and what source do we want to show? My initial approach
147
+ * would be to use type-tracking to propagate from the `not_found.get_passwd` attribute
148
+ * lookup, to the use of `non_sensitive_name`, and then create a new `SensitiveDataSource::Range`
149
+ * like `SensitiveFunctionCall`. Although that seems likely to work, it will also end up
150
+ * with a non-optimal path, which starts at _bad source_, and therefore doesn't show
151
+ * how we figured out that `non_sensitive_name`
152
+ * could be a function that returns a password (and in cases where there is many calls to
153
+ * `my_func` it will be annoying for someone to figure this out manually).
154
+ *
155
+ * By including this additional taint-step in the taint-tracking configuration, it's possible
156
+ * to get a path explanation going from _good source_ to the sink.
157
+ *
158
+ * ```python
159
+ * def my_func(non_sensitive_name):
160
+ * x = non_sensitive_name() # <-- bad source
161
+ * print(x) # <-- sink
162
+ *
163
+ * import not_found
164
+ * f = not_found.get_passwd # <-- good source
165
+ * my_func(f)
166
+ * ```
167
+ */
168
+ predicate extraStepForCalls ( DataFlow:: Node nodeFrom , DataFlow:: CallCfgNode nodeTo ) {
169
+ // However, we do still use the type-tracking approach to limit the size of this
170
+ // predicate.
171
+ nodeTo .getFunction ( ) = nodeFrom and
172
+ nodeFrom = possibleSensitiveCallable ( )
173
+ }
174
+
175
+ /**
176
+ * Any kind of variable assignment (also including with/for) where the name indicates
177
+ * it contains sensitive data.
178
+ *
179
+ * Note: We _could_ make any access to a variable with a sensitive name a source of
180
+ * sensitive data, but to make path explanations in data-flow/taint-tracking good,
181
+ * we don't want that, since it works against allowing users to understand the flow
182
+ * in the program (which is the whole point).
183
+ *
184
+ * Note: To make data-flow/taint-tracking work, the expression that is _assigned_ to
185
+ * the variable is marked as the source (as compared to marking the variable as the
186
+ * source).
187
+ */
188
+ class SensitiveVariableAssignment extends SensitiveDataSource:: Range {
189
+ SensitiveDataClassification classification ;
190
+
191
+ SensitiveVariableAssignment ( ) {
192
+ exists ( DefinitionNode def |
193
+ nameIndicatesSensitiveData ( def .( NameNode ) .getId ( ) , classification ) and
194
+ (
195
+ this .asCfgNode ( ) = def .getValue ( )
196
+ or
197
+ this .asCfgNode ( ) = def .getValue ( ) .( ForNode ) .getSequence ( )
198
+ ) and
199
+ not this .asExpr ( ) instanceof FunctionExpr and
200
+ not this .asExpr ( ) instanceof ClassExpr
201
+ )
202
+ or
203
+ exists ( With with |
204
+ nameIndicatesSensitiveData ( with .getOptionalVars ( ) .( Name ) .getId ( ) , classification ) and
205
+ this .asExpr ( ) = with .getContextExpr ( )
206
+ )
207
+ }
208
+
209
+ override SensitiveDataClassification getClassification ( ) { result = classification }
210
+ }
211
+
212
+ /** An attribute access that is considered a source of sensitive data. */
213
+ class SensitiveAttributeAccess extends SensitiveDataSource:: Range {
214
+ SensitiveDataClassification classification ;
215
+
216
+ SensitiveAttributeAccess ( ) {
217
+ // Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
218
+ // I considered excluding any `from ... import something_sensitive`, but then realized that
219
+ // we should flag up `form ... import password as ...` as a password
220
+ nameIndicatesSensitiveData ( this .( DataFlow:: AttrRead ) .getAttributeName ( ) , classification )
221
+ or
222
+ // Things like `getattr(foo, <reference-to-string>)`
223
+ this .( DataFlow:: AttrRead ) .getAttributeNameExpr ( ) = sensitiveLookupStringConst ( classification )
224
+ }
225
+
226
+ override SensitiveDataClassification getClassification ( ) { result = classification }
227
+ }
228
+
229
+ /** A subscript, where the key indicates the result will be sensitive data. */
230
+ class SensitiveSubscript extends SensitiveDataSource:: Range {
231
+ SensitiveDataClassification classification ;
232
+
233
+ SensitiveSubscript ( ) {
234
+ this .asCfgNode ( ) .( SubscriptNode ) .getIndex ( ) =
235
+ sensitiveLookupStringConst ( classification ) .asCfgNode ( )
236
+ }
237
+
238
+ override SensitiveDataClassification getClassification ( ) { result = classification }
239
+ }
240
+
241
+ /** A call to `get` on an object, where the key indicates the result will be sensitive data. */
242
+ class SensitiveGetCall extends SensitiveDataSource:: Range , DataFlow:: CallCfgNode {
243
+ SensitiveDataClassification classification ;
244
+
245
+ SensitiveGetCall ( ) {
246
+ this .getFunction ( ) .asCfgNode ( ) .( AttrNode ) .getName ( ) = "get" and
247
+ this .getArg ( 0 ) = sensitiveLookupStringConst ( classification )
248
+ }
249
+
250
+ override SensitiveDataClassification getClassification ( ) { result = classification }
251
+ }
252
+
253
+ /** A parameter where the name indicates it will receive sensitive data. */
254
+ class SensitiveParameter extends SensitiveDataSource:: Range , DataFlow:: ParameterNode {
255
+ SensitiveDataClassification classification ;
256
+
257
+ SensitiveParameter ( ) {
258
+ nameIndicatesSensitiveData ( this .getParameter ( ) .getName ( ) , classification )
259
+ }
260
+
261
+ override SensitiveDataClassification getClassification ( ) { result = classification }
262
+ }
66
263
}
264
+
265
+ predicate sensitiveDataExtraStepForCalls = SensitiveDataModeling:: extraStepForCalls / 2 ;
0 commit comments