@@ -21,6 +21,7 @@ private import codeql.ruby.typetracking.TypeTracker
21
21
private import codeql.ruby.ApiGraphs
22
22
private import codeql.ruby.Concepts
23
23
private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
24
+ private import codeql.ruby.dataflow.internal.TaintTrackingPrivate as TaintTrackingPrivate
24
25
private import codeql.ruby.TaintTracking
25
26
private import codeql.ruby.frameworks.core.String
26
27
@@ -37,43 +38,6 @@ DataFlow::LocalSourceNode strStart() {
37
38
/** Gets a dataflow node for a regular expression literal. */
38
39
DataFlow:: LocalSourceNode regStart ( ) { result .asExpr ( ) .getExpr ( ) instanceof Ast:: RegExpLiteral }
39
40
40
- /**
41
- * Holds if the analysis should track flow from `nodeFrom` to `nodeTo` on top of the ordinary type-tracking steps.
42
- * `nodeFrom` and `nodeTo` has type `fromType` and `toType` respectively.
43
- * The types are either "string" or "regexp".
44
- */
45
- predicate step (
46
- DataFlow:: Node nodeFrom , DataFlow:: LocalSourceNode nodeTo , string fromType , string toType
47
- ) {
48
- fromType = toType and
49
- fromType = "string" and
50
- (
51
- // include taint flow through `String` summaries
52
- TaintTracking:: localTaintStep ( nodeFrom , nodeTo ) and
53
- nodeFrom .( DataFlowPrivate:: SummaryNode ) .getSummarizedCallable ( ) instanceof
54
- String:: SummarizedCallable
55
- or
56
- // string concatenations, and
57
- exists ( CfgNodes:: ExprNodes:: OperationCfgNode op |
58
- op = nodeTo .asExpr ( ) and
59
- op .getAnOperand ( ) = nodeFrom .asExpr ( ) and
60
- op .getExpr ( ) .( Ast:: BinaryOperation ) .getOperator ( ) = "+"
61
- )
62
- or
63
- // string interpolations
64
- nodeFrom .asExpr ( ) =
65
- nodeTo .asExpr ( ) .( CfgNodes:: ExprNodes:: StringlikeLiteralCfgNode ) .getAComponent ( )
66
- )
67
- or
68
- fromType = "string" and
69
- toType = "reg" and
70
- exists ( DataFlow:: CallNode call |
71
- call = API:: getTopLevelMember ( "Regexp" ) .getAMethodCall ( [ "compile" , "new" ] ) and
72
- nodeFrom = call .getArgument ( 0 ) and
73
- nodeTo = call
74
- )
75
- }
76
-
77
41
/** Gets a node where string values that flow to the node are interpreted as regular expressions. */
78
42
DataFlow:: Node stringSink ( ) {
79
43
result instanceof RE:: RegExpInterpretation:: Range and
@@ -91,70 +55,174 @@ DataFlow::Node stringSink() {
91
55
/** Gets a node where regular expressions that flow to the node are used. */
92
56
DataFlow:: Node regSink ( ) { result = any ( RegexExecution exec ) .getRegex ( ) }
93
57
94
- /** Gets a node that is reachable by type-tracking from any string or regular expression. */
95
- DataFlow:: LocalSourceNode forward ( TypeTracker t ) {
96
- t .start ( ) and
97
- result = [ strStart ( ) , regStart ( ) ]
98
- or
99
- exists ( TypeTracker t2 | result = forward ( t2 ) .track ( t2 , t ) )
100
- or
101
- exists ( TypeTracker t2 | t2 = t .continue ( ) | step ( forward ( t2 ) .getALocalUse ( ) , result , _, _) )
58
+ private signature module TypeTrackInputSig {
59
+ DataFlow:: LocalSourceNode start ( TypeTracker t , DataFlow:: Node start ) ;
60
+
61
+ predicate end ( DataFlow:: Node n ) ;
62
+
63
+ predicate additionalStep ( DataFlow:: Node nodeFrom , DataFlow:: LocalSourceNode nodeTo ) ;
102
64
}
103
65
104
66
/**
105
- * Gets a node that is backwards reachable from any regular expression use ,
106
- * where that use is reachable by type- tracking from any string or regular expression .
67
+ * Provides a version of type tracking where we first prune for reachable nodes ,
68
+ * before doing the type tracking computation .
107
69
*/
108
- DataFlow:: LocalSourceNode backwards ( TypeBackTracker t ) {
109
- t .start ( ) and
110
- result .flowsTo ( [ stringSink ( ) , regSink ( ) ] ) and
111
- result = forward ( TypeTracker:: end ( ) )
112
- or
113
- exists ( TypeBackTracker t2 | result = backwards ( t2 ) .backtrack ( t2 , t ) )
114
- or
115
- exists ( TypeBackTracker t2 | t2 = t .continue ( ) | step ( result .getALocalUse ( ) , backwards ( t2 ) , _, _) )
70
+ private module TypeTrack< TypeTrackInputSig Input> {
71
+ private predicate additionalStep (
72
+ DataFlow:: LocalSourceNode nodeFrom , DataFlow:: LocalSourceNode nodeTo
73
+ ) {
74
+ Input:: additionalStep ( nodeFrom .getALocalUse ( ) , nodeTo )
75
+ }
76
+
77
+ /** Gets a node that is forwards reachable by type-tracking. */
78
+ pragma [ nomagic]
79
+ private DataFlow:: LocalSourceNode forward ( TypeTracker t ) {
80
+ result = Input:: start ( t , _)
81
+ or
82
+ exists ( TypeTracker t2 | result = forward ( t2 ) .track ( t2 , t ) )
83
+ or
84
+ exists ( TypeTracker t2 | t2 = t .continue ( ) | additionalStep ( forward ( t2 ) , result ) )
85
+ }
86
+
87
+ bindingset [ result , tbt]
88
+ pragma [ inline_late]
89
+ pragma [ noopt]
90
+ private DataFlow:: LocalSourceNode forwardLateInline ( TypeBackTracker tbt ) {
91
+ exists ( TypeTracker tt |
92
+ result = forward ( tt ) and
93
+ tt = tbt .getACompatibleTypeTracker ( )
94
+ )
95
+ }
96
+
97
+ /** Gets a node that is backwards reachable by type-tracking. */
98
+ pragma [ nomagic]
99
+ private DataFlow:: LocalSourceNode backwards ( TypeBackTracker t ) {
100
+ result = forwardLateInline ( t ) and
101
+ (
102
+ t .start ( ) and
103
+ Input:: end ( result .getALocalUse ( ) )
104
+ or
105
+ exists ( TypeBackTracker t2 | result = backwards ( t2 ) .backtrack ( t2 , t ) )
106
+ or
107
+ exists ( TypeBackTracker t2 | t2 = t .continue ( ) | additionalStep ( result , backwards ( t2 ) ) )
108
+ )
109
+ }
110
+
111
+ bindingset [ result , tt]
112
+ pragma [ inline_late]
113
+ pragma [ noopt]
114
+ private DataFlow:: LocalSourceNode backwardsInlineLate ( TypeTracker tt ) {
115
+ exists ( TypeBackTracker tbt |
116
+ result = backwards ( tbt ) and
117
+ tt = tbt .getACompatibleTypeTracker ( )
118
+ )
119
+ }
120
+
121
+ /** Holds if `n` is forwards and backwards reachable with type tracker `t`. */
122
+ pragma [ nomagic]
123
+ private predicate reached ( DataFlow:: LocalSourceNode n , TypeTracker t ) {
124
+ n = forward ( t ) and
125
+ n = backwardsInlineLate ( t )
126
+ }
127
+
128
+ pragma [ nomagic]
129
+ private TypeTracker stepReached (
130
+ TypeTracker t , DataFlow:: LocalSourceNode nodeFrom , DataFlow:: LocalSourceNode nodeTo
131
+ ) {
132
+ exists ( StepSummary summary |
133
+ StepSummary:: step ( nodeFrom , nodeTo , summary ) and
134
+ reached ( nodeFrom , t ) and
135
+ reached ( nodeTo , result ) and
136
+ result = t .append ( summary )
137
+ )
138
+ or
139
+ additionalStep ( nodeFrom , nodeTo ) and
140
+ reached ( nodeFrom , pragma [ only_bind_into ] ( t ) ) and
141
+ reached ( nodeTo , pragma [ only_bind_into ] ( t ) ) and
142
+ result = t .continue ( )
143
+ }
144
+
145
+ /** Gets a node that has been tracked from the start node `start`. */
146
+ DataFlow:: LocalSourceNode track ( DataFlow:: Node start , TypeTracker t ) {
147
+ t .start ( ) and
148
+ result = Input:: start ( t , start ) and
149
+ reached ( result , t )
150
+ or
151
+ exists ( TypeTracker t2 | t = stepReached ( t2 , track ( start , t2 ) , result ) )
152
+ }
153
+ }
154
+
155
+ /** Holds if `inputStr` is compiled to a regular expression that is returned at `call`. */
156
+ pragma [ nomagic]
157
+ private predicate regFromString ( DataFlow:: LocalSourceNode inputStr , DataFlow:: CallNode call ) {
158
+ exists ( DataFlow:: Node mid |
159
+ inputStr .flowsTo ( mid ) and
160
+ call = API:: getTopLevelMember ( "Regexp" ) .getAMethodCall ( [ "compile" , "new" ] ) and
161
+ mid = call .getArgument ( 0 )
162
+ )
163
+ }
164
+
165
+ private module StringTypeTrackInput implements TypeTrackInputSig {
166
+ DataFlow:: LocalSourceNode start ( TypeTracker t , DataFlow:: Node start ) {
167
+ start = strStart ( ) and t .start ( ) and result = start
168
+ }
169
+
170
+ predicate end ( DataFlow:: Node n ) {
171
+ n = stringSink ( ) or
172
+ regFromString ( n , _)
173
+ }
174
+
175
+ predicate additionalStep ( DataFlow:: Node nodeFrom , DataFlow:: LocalSourceNode nodeTo ) {
176
+ // include taint flow through `String` summaries
177
+ TaintTrackingPrivate:: summaryThroughStepTaint ( nodeFrom , nodeTo ,
178
+ any ( String:: SummarizedCallable c ) )
179
+ or
180
+ // string concatenations, and
181
+ exists ( CfgNodes:: ExprNodes:: OperationCfgNode op |
182
+ op = nodeTo .asExpr ( ) and
183
+ op .getAnOperand ( ) = nodeFrom .asExpr ( ) and
184
+ op .getExpr ( ) .( Ast:: BinaryOperation ) .getOperator ( ) = "+"
185
+ )
186
+ or
187
+ // string interpolations
188
+ nodeFrom .asExpr ( ) =
189
+ nodeTo .asExpr ( ) .( CfgNodes:: ExprNodes:: StringlikeLiteralCfgNode ) .getAComponent ( )
190
+ }
116
191
}
117
192
118
193
/**
119
194
* Gets a node that has been tracked from the string constant `start` to some node.
120
195
* This is used to figure out where `start` is evaluated as a regular expression against an input string,
121
196
* or where `start` is compiled into a regular expression.
122
197
*/
123
- private DataFlow:: LocalSourceNode trackStrings ( DataFlow:: Node start , TypeTracker t ) {
124
- result = backwards ( _) and
125
- (
198
+ private predicate trackStrings = TypeTrack< StringTypeTrackInput > :: track / 2 ;
199
+
200
+ /** Holds if `strConst` flows to a regex compilation (tracked by `t`), where the resulting regular expression is stored in `reg`. */
201
+ pragma [ nomagic]
202
+ private predicate regFromStringStart ( DataFlow:: Node strConst , TypeTracker t , DataFlow:: CallNode reg ) {
203
+ regFromString ( trackStrings ( strConst , t ) , reg ) and
204
+ exists ( t .continue ( ) )
205
+ }
206
+
207
+ private module RegTypeTrackInput implements TypeTrackInputSig {
208
+ DataFlow:: LocalSourceNode start ( TypeTracker t , DataFlow:: Node start ) {
209
+ start = regStart ( ) and
126
210
t .start ( ) and
127
- start = result and
128
- result = strStart ( )
211
+ result = start
129
212
or
130
- exists ( TypeTracker t2 | result = trackStrings ( start , t2 ) .track ( t2 , t ) )
131
- or
132
- // an additional step from string to string
133
- exists ( TypeTracker t2 | t2 = t .continue ( ) |
134
- step ( trackStrings ( start , t2 ) .getALocalUse ( ) , result , "string" , "string" )
135
- )
136
- )
213
+ regFromStringStart ( start , t , result )
214
+ }
215
+
216
+ predicate end ( DataFlow:: Node n ) { n = regSink ( ) }
217
+
218
+ predicate additionalStep ( DataFlow:: Node nodeFrom , DataFlow:: LocalSourceNode nodeTo ) { none ( ) }
137
219
}
138
220
139
221
/**
140
222
* Gets a node that has been tracked from the regular expression `start` to some node.
141
223
* This is used to figure out where `start` is executed against an input string.
142
224
*/
143
- private DataFlow:: LocalSourceNode trackRegs ( DataFlow:: Node start , TypeTracker t ) {
144
- result = backwards ( _) and
145
- (
146
- t .start ( ) and
147
- start = result and
148
- result = regStart ( )
149
- or
150
- exists ( TypeTracker t2 | result = trackRegs ( start , t2 ) .track ( t2 , t ) )
151
- or
152
- // an additional step where a string is converted to a regular expression
153
- exists ( TypeTracker t2 | t2 = t .continue ( ) |
154
- step ( trackStrings ( start , t2 ) .getALocalUse ( ) , result , "string" , "reg" )
155
- )
156
- )
157
- }
225
+ private predicate trackRegs = TypeTrack< RegTypeTrackInput > :: track / 2 ;
158
226
159
227
/** Gets a node that references a regular expression. */
160
228
private DataFlow:: LocalSourceNode trackRegexpType ( TypeTracker t ) {
0 commit comments