@@ -93,6 +93,8 @@ private module SensitiveDataModeling {
93
93
/**
94
94
* Gets a reference to a string constant that, if used as the key in a lookup,
95
95
* indicates the presence of sensitive data with `classification`.
96
+ *
97
+ * Also see `extraStepForCalls`.
96
98
*/
97
99
DataFlow:: Node sensitiveLookupStringConst ( SensitiveDataClassification classification ) {
98
100
sensitiveLookupStringConst ( DataFlow:: TypeTracker:: end ( ) , classification ) .flowsTo ( result )
@@ -105,12 +107,49 @@ private module SensitiveDataModeling {
105
107
SensitiveFunctionCall ( ) {
106
108
this .getFunction ( ) = sensitiveFunction ( classification )
107
109
or
110
+ // to cover functions that we don't have the definition for, and where the
111
+ // reference to the function has not already been marked as being sensitive
108
112
nameIndicatesSensitiveData ( this .getFunction ( ) .asCfgNode ( ) .( NameNode ) .getId ( ) , classification )
109
113
}
110
114
111
115
override SensitiveDataClassification getClassification ( ) { result = classification }
112
116
}
113
117
118
+ /**
119
+ * Holds if the step from `nodeFrom` to `nodeTo` should be considered a
120
+ * taint-flow step for sensitive-data, to ensure calls are handled correctly.
121
+ *
122
+ * To handle calls properly, while preserving a good source for path explanations,
123
+ * you need to include this predicate as an additional taint step in your taint-tracking
124
+ * configurations.
125
+ *
126
+ * The core problem can be illustrated by the example below. If we consider the
127
+ * `print` a sink, what path and what source do we want to show? My initial approach
128
+ * would be to use type-tracking to propagate from the `not_found.get_passwd` attribute
129
+ * lookup, to the use of `non_sensitive_name`, and then create a new `SensitiveDataSource::Range`
130
+ * like `SensitiveFunctionCall`. Although that seems likely to work, it will also end up
131
+ * with a non-optimal path, which starts at _bad source_, and therefore doesn't show
132
+ * how we figured out that `non_sensitive_name`
133
+ * could be a function that returns a password (and in cases where there is many calls to
134
+ * `my_func` it will be annoying for someone to figure this out manually).
135
+ *
136
+ * By including this additional taint-step in the taint-tracking configuration, it's possible
137
+ * to get a path explanation going from _good source_ to the sink.
138
+ *
139
+ * ```python
140
+ * def my_func(non_sensitive_name):
141
+ * x = non_sensitive_name() # <-- bad source
142
+ * print(x) # <-- sink
143
+ *
144
+ * import not_found
145
+ * f = not_found.get_passwd # <-- good source
146
+ * my_func(f)
147
+ * ```
148
+ */
149
+ predicate extraStepForCalls ( DataFlow:: Node nodeFrom , DataFlow:: CallCfgNode nodeTo ) {
150
+ nodeTo .getFunction ( ) = nodeFrom
151
+ }
152
+
114
153
/**
115
154
* Any kind of variable assignment (also including with/for) where the name indicates
116
155
* it contains sensitive data.
@@ -200,3 +239,5 @@ private module SensitiveDataModeling {
200
239
override SensitiveDataClassification getClassification ( ) { result = classification }
201
240
}
202
241
}
242
+
243
+ predicate sensitiveDataExtraStepForCalls = SensitiveDataModeling:: extraStepForCalls / 2 ;
0 commit comments