@@ -60,7 +60,7 @@ private module SensitiveDataModeling {
60
60
) {
61
61
t .start ( ) and
62
62
exists ( Function f |
63
- nameIndicatesSensitiveData ( f .getName ( ) , classification ) and
63
+ f .getName ( ) = sensitiveString ( classification ) and
64
64
result .asExpr ( ) = f .getDefinition ( )
65
65
)
66
66
or
@@ -83,7 +83,7 @@ private module SensitiveDataModeling {
83
83
// Note: If this is implemented with type-tracking, we will get cross-talk as
84
84
// illustrated in python/ql/test/experimental/dataflow/sensitive-data/test.py
85
85
exists ( DataFlow:: LocalSourceNode source |
86
- nameIndicatesSensitiveData ( source .asExpr ( ) .( StrConst ) .getText ( ) , classification ) and
86
+ source .asExpr ( ) .( StrConst ) .getText ( ) = sensitiveString ( classification ) and
87
87
source .flowsTo ( result )
88
88
)
89
89
}
@@ -97,7 +97,7 @@ private module SensitiveDataModeling {
97
97
or
98
98
// to cover functions that we don't have the definition for, and where the
99
99
// reference to the function has not already been marked as being sensitive
100
- nameIndicatesSensitiveData ( this .getFunction ( ) .asCfgNode ( ) .( NameNode ) .getId ( ) , classification )
100
+ this .getFunction ( ) .asCfgNode ( ) .( NameNode ) .getId ( ) = sensitiveString ( classification )
101
101
}
102
102
103
103
override SensitiveDataClassification getClassification ( ) { result = classification }
@@ -164,6 +164,68 @@ private module SensitiveDataModeling {
164
164
nodeFrom = possibleSensitiveCallable ( )
165
165
}
166
166
167
+ pragma [ nomagic]
168
+ private string sensitiveStrConstCandidate ( ) {
169
+ result = any ( StrConst s | not s .isDocString ( ) ) .getText ( ) and
170
+ not result .regexpMatch ( notSensitiveRegexp ( ) )
171
+ }
172
+
173
+ pragma [ nomagic]
174
+ private string sensitiveAttributeNameCandidate ( ) {
175
+ result = any ( DataFlow:: AttrRead a ) .getAttributeName ( ) and
176
+ not result .regexpMatch ( notSensitiveRegexp ( ) )
177
+ }
178
+
179
+ pragma [ nomagic]
180
+ private string sensitiveParameterNameCandidate ( ) {
181
+ result = any ( Parameter p ) .getName ( ) and
182
+ not result .regexpMatch ( notSensitiveRegexp ( ) )
183
+ }
184
+
185
+ pragma [ nomagic]
186
+ private string sensitiveFunctionNameCandidate ( ) {
187
+ result = any ( Function f ) .getName ( ) and
188
+ not result .regexpMatch ( notSensitiveRegexp ( ) )
189
+ }
190
+
191
+ pragma [ nomagic]
192
+ private string sensitiveNameCandidate ( ) {
193
+ result = any ( Name n ) .getId ( ) and
194
+ not result .regexpMatch ( notSensitiveRegexp ( ) )
195
+ }
196
+
197
+ /**
198
+ * This helper predicate serves to deduplicate the results of the preceding predicates. This
199
+ * means that if, say, an attribute and a function parameter have the same name, then that name will
200
+ * only be matched once, which greatly cuts down on the number of regexp matches that have to be
201
+ * performed.
202
+ *
203
+ * Under normal circumstances, deduplication is only performed when a predicate is materialized, and
204
+ * so to see the effect of this we must create a separate predicate that calculates the union of the
205
+ * preceding predicates.
206
+ */
207
+ pragma [ nomagic]
208
+ private string sensitiveStringCandidate ( ) {
209
+ result in [
210
+ sensitiveNameCandidate ( ) , sensitiveAttributeNameCandidate ( ) ,
211
+ sensitiveParameterNameCandidate ( ) , sensitiveFunctionNameCandidate ( ) ,
212
+ sensitiveStrConstCandidate ( )
213
+ ]
214
+ }
215
+
216
+ /**
217
+ * Returns strings (primarily the names of various program entities) that may contain sensitive data
218
+ * with the classification `classification`.
219
+ *
220
+ * This helper predicate ends up being very similar to `nameIndicatesSensitiveData`,
221
+ * but is performance optimized to limit the number of regexp matches that have to be performed.
222
+ */
223
+ pragma [ nomagic]
224
+ private string sensitiveString ( SensitiveDataClassification classification ) {
225
+ result = sensitiveStringCandidate ( ) and
226
+ result .regexpMatch ( maybeSensitiveRegexp ( classification ) )
227
+ }
228
+
167
229
/**
168
230
* Any kind of variable assignment (also including with/for) where the name indicates
169
231
* it contains sensitive data.
@@ -182,7 +244,7 @@ private module SensitiveDataModeling {
182
244
183
245
SensitiveVariableAssignment ( ) {
184
246
exists ( DefinitionNode def |
185
- nameIndicatesSensitiveData ( def .( NameNode ) .getId ( ) , classification ) and
247
+ def .( NameNode ) .getId ( ) = sensitiveString ( classification ) and
186
248
(
187
249
this .asCfgNode ( ) = def .getValue ( )
188
250
or
@@ -193,7 +255,7 @@ private module SensitiveDataModeling {
193
255
)
194
256
or
195
257
exists ( With with |
196
- nameIndicatesSensitiveData ( with .getOptionalVars ( ) .( Name ) .getId ( ) , classification ) and
258
+ with .getOptionalVars ( ) .( Name ) .getId ( ) = sensitiveString ( classification ) and
197
259
this .asExpr ( ) = with .getContextExpr ( )
198
260
)
199
261
}
@@ -209,7 +271,7 @@ private module SensitiveDataModeling {
209
271
// Things like `foo.<sensitive-name>` or `from <module> import <sensitive-name>`
210
272
// I considered excluding any `from ... import something_sensitive`, but then realized that
211
273
// we should flag up `form ... import password as ...` as a password
212
- nameIndicatesSensitiveData ( this .( DataFlow:: AttrRead ) .getAttributeName ( ) , classification )
274
+ this .( DataFlow:: AttrRead ) .getAttributeName ( ) = sensitiveString ( classification )
213
275
or
214
276
// Things like `getattr(foo, <reference-to-string>)`
215
277
this .( DataFlow:: AttrRead ) .getAttributeNameExpr ( ) = sensitiveLookupStringConst ( classification )
@@ -246,9 +308,7 @@ private module SensitiveDataModeling {
246
308
class SensitiveParameter extends SensitiveDataSource:: Range , DataFlow:: ParameterNode {
247
309
SensitiveDataClassification classification ;
248
310
249
- SensitiveParameter ( ) {
250
- nameIndicatesSensitiveData ( this .getParameter ( ) .getName ( ) , classification )
251
- }
311
+ SensitiveParameter ( ) { this .getParameter ( ) .getName ( ) = sensitiveString ( classification ) }
252
312
253
313
override SensitiveDataClassification getClassification ( ) { result = classification }
254
314
}
0 commit comments