@@ -21,6 +21,7 @@ private import codeql.ruby.typetracking.TypeTracker
21
21
private import codeql.ruby.ApiGraphs
22
22
private import codeql.ruby.Concepts
23
23
private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
24
+ private import codeql.ruby.dataflow.internal.TaintTrackingPrivate as TaintTrackingPrivate
24
25
private import codeql.ruby.TaintTracking
25
26
private import codeql.ruby.frameworks.core.String
26
27
@@ -37,43 +38,6 @@ DataFlow::LocalSourceNode strStart() {
37
38
/** Gets a dataflow node for a regular expression literal. */
38
39
DataFlow:: LocalSourceNode regStart ( ) { result .asExpr ( ) .getExpr ( ) instanceof Ast:: RegExpLiteral }
39
40
40
- /**
41
- * Holds if the analysis should track flow from `nodeFrom` to `nodeTo` on top of the ordinary type-tracking steps.
42
- * `nodeFrom` and `nodeTo` has type `fromType` and `toType` respectively.
43
- * The types are either "string" or "regexp".
44
- */
45
- predicate step (
46
- DataFlow:: Node nodeFrom , DataFlow:: LocalSourceNode nodeTo , string fromType , string toType
47
- ) {
48
- fromType = toType and
49
- fromType = "string" and
50
- (
51
- // include taint flow through `String` summaries
52
- TaintTracking:: localTaintStep ( nodeFrom , nodeTo ) and
53
- nodeFrom .( DataFlowPrivate:: SummaryNode ) .getSummarizedCallable ( ) instanceof
54
- String:: SummarizedCallable
55
- or
56
- // string concatenations, and
57
- exists ( CfgNodes:: ExprNodes:: OperationCfgNode op |
58
- op = nodeTo .asExpr ( ) and
59
- op .getAnOperand ( ) = nodeFrom .asExpr ( ) and
60
- op .getExpr ( ) .( Ast:: BinaryOperation ) .getOperator ( ) = "+"
61
- )
62
- or
63
- // string interpolations
64
- nodeFrom .asExpr ( ) =
65
- nodeTo .asExpr ( ) .( CfgNodes:: ExprNodes:: StringlikeLiteralCfgNode ) .getAComponent ( )
66
- )
67
- or
68
- fromType = "string" and
69
- toType = "reg" and
70
- exists ( DataFlow:: CallNode call |
71
- call = API:: getTopLevelMember ( "Regexp" ) .getAMethodCall ( [ "compile" , "new" ] ) and
72
- nodeFrom = call .getArgument ( 0 ) and
73
- nodeTo = call
74
- )
75
- }
76
-
77
41
/** Gets a node where string values that flow to the node are interpreted as regular expressions. */
78
42
DataFlow:: Node stringSink ( ) {
79
43
result instanceof RE:: RegExpInterpretation:: Range and
@@ -91,69 +55,172 @@ DataFlow::Node stringSink() {
91
55
/** Gets a node where regular expressions that flow to the node are used. */
92
56
DataFlow:: Node regSink ( ) { result = any ( RegexExecution exec ) .getRegex ( ) }
93
57
94
- /** Gets a node that is reachable by type-tracking from any string or regular expression. */
95
- DataFlow:: LocalSourceNode forward ( TypeTracker t ) {
96
- t .start ( ) and
97
- result = [ strStart ( ) , regStart ( ) ]
98
- or
99
- exists ( TypeTracker t2 | result = forward ( t2 ) .track ( t2 , t ) )
100
- or
101
- exists ( TypeTracker t2 | t2 = t .continue ( ) | step ( forward ( t2 ) .getALocalUse ( ) , result , _, _) )
58
+ private signature module ReachInputSig {
59
+ DataFlow:: LocalSourceNode start ( TypeTracker t ) ;
60
+
61
+ DataFlow:: Node end ( ) ;
62
+
63
+ predicate additionalStep ( DataFlow:: LocalSourceNode nodeFrom , DataFlow:: LocalSourceNode nodeTo ) ;
102
64
}
103
65
104
- /**
105
- * Gets a node that is backwards reachable from any regular expression use,
106
- * where that use is reachable by type-tracking from any string or regular expression.
107
- */
108
- DataFlow:: LocalSourceNode backwards ( TypeBackTracker t ) {
109
- t .start ( ) and
110
- result .flowsTo ( [ stringSink ( ) , regSink ( ) ] ) and
111
- result = forward ( TypeTracker:: end ( ) )
112
- or
113
- exists ( TypeBackTracker t2 | result = backwards ( t2 ) .backtrack ( t2 , t ) )
114
- or
115
- exists ( TypeBackTracker t2 | t2 = t .continue ( ) | step ( result .getALocalUse ( ) , backwards ( t2 ) , _, _) )
66
+ private module Reach< ReachInputSig Input> {
67
+ /** Gets a node that is forwards reachable by type-tracking. */
68
+ pragma [ nomagic]
69
+ private DataFlow:: LocalSourceNode forward ( TypeTracker t ) {
70
+ result = Input:: start ( t )
71
+ or
72
+ exists ( TypeTracker t2 | result = forward ( t2 ) .track ( t2 , t ) )
73
+ or
74
+ exists ( TypeTracker t2 | t2 = t .continue ( ) | Input:: additionalStep ( forward ( t2 ) , result ) )
75
+ }
76
+
77
+ bindingset [ result , tbt]
78
+ pragma [ inline_late]
79
+ pragma [ noopt]
80
+ private DataFlow:: LocalSourceNode forwardLateInline ( TypeBackTracker tbt ) {
81
+ exists ( TypeTracker tt |
82
+ result = forward ( tt ) and
83
+ tt = tbt .getACompatibleTypeTracker ( )
84
+ )
85
+ }
86
+
87
+ /** Gets a node that is backwards reachable by type-tracking. */
88
+ pragma [ nomagic]
89
+ private DataFlow:: LocalSourceNode backwards ( TypeBackTracker t ) {
90
+ result = forwardLateInline ( t ) and
91
+ (
92
+ t .start ( ) and
93
+ result .flowsTo ( Input:: end ( ) )
94
+ or
95
+ exists ( TypeBackTracker t2 | result = backwards ( t2 ) .backtrack ( t2 , t ) )
96
+ or
97
+ exists ( TypeBackTracker t2 | t2 = t .continue ( ) | Input:: additionalStep ( result , backwards ( t2 ) ) )
98
+ )
99
+ }
100
+
101
+ bindingset [ result , tt]
102
+ pragma [ inline_late]
103
+ pragma [ noopt]
104
+ private DataFlow:: LocalSourceNode backwardsInlineLate ( TypeTracker tt ) {
105
+ exists ( TypeBackTracker tbt |
106
+ result = backwards ( tbt ) and
107
+ tt = tbt .getACompatibleTypeTracker ( )
108
+ )
109
+ }
110
+
111
+ pragma [ nomagic]
112
+ predicate reached ( DataFlow:: LocalSourceNode n , TypeTracker t ) {
113
+ n = forward ( t ) and
114
+ n = backwardsInlineLate ( t )
115
+ }
116
+
117
+ pragma [ nomagic]
118
+ TypeTracker stepReached (
119
+ TypeTracker t , DataFlow:: LocalSourceNode nodeFrom , DataFlow:: LocalSourceNode nodeTo
120
+ ) {
121
+ exists ( StepSummary summary |
122
+ StepSummary:: step ( nodeFrom , nodeTo , summary ) and
123
+ reached ( nodeFrom , t ) and
124
+ reached ( nodeTo , result ) and
125
+ result = t .append ( summary )
126
+ )
127
+ or
128
+ Input:: additionalStep ( nodeFrom , nodeTo ) and
129
+ reached ( nodeFrom , pragma [ only_bind_into ] ( t ) ) and
130
+ reached ( nodeTo , pragma [ only_bind_into ] ( t ) ) and
131
+ result = t .continue ( )
132
+ }
133
+ }
134
+
135
+ pragma [ nomagic]
136
+ private predicate regFromString ( DataFlow:: LocalSourceNode n , DataFlow:: CallNode call ) {
137
+ exists ( DataFlow:: Node mid |
138
+ n .flowsTo ( mid ) and
139
+ call = API:: getTopLevelMember ( "Regexp" ) .getAMethodCall ( [ "compile" , "new" ] ) and
140
+ mid = call .getArgument ( 0 )
141
+ )
142
+ }
143
+
144
+ private module StringReachInput implements ReachInputSig {
145
+ DataFlow:: LocalSourceNode start ( TypeTracker t ) { result = strStart ( ) and t .start ( ) }
146
+
147
+ DataFlow:: Node end ( ) {
148
+ result = stringSink ( ) or
149
+ regFromString ( result , _)
150
+ }
151
+
152
+ predicate additionalStep ( DataFlow:: LocalSourceNode nodeFrom , DataFlow:: LocalSourceNode nodeTo ) {
153
+ exists ( DataFlow:: Node mid | nodeFrom .flowsTo ( mid ) |
154
+ // include taint flow through `String` summaries
155
+ TaintTrackingPrivate:: summaryThroughStepTaint ( mid , nodeTo , any ( String:: SummarizedCallable c ) )
156
+ or
157
+ // string concatenations, and
158
+ exists ( CfgNodes:: ExprNodes:: OperationCfgNode op |
159
+ op = nodeTo .asExpr ( ) and
160
+ op .getAnOperand ( ) = mid .asExpr ( ) and
161
+ op .getExpr ( ) .( Ast:: BinaryOperation ) .getOperator ( ) = "+"
162
+ )
163
+ or
164
+ // string interpolations
165
+ mid .asExpr ( ) = nodeTo .asExpr ( ) .( CfgNodes:: ExprNodes:: StringlikeLiteralCfgNode ) .getAComponent ( )
166
+ )
167
+ }
116
168
}
117
169
170
+ private module StringReach = Reach< StringReachInput > ;
171
+
118
172
/**
119
173
* Gets a node that has been tracked from the string constant `start` to some node.
120
174
* This is used to figure out where `start` is evaluated as a regular expression against an input string,
121
175
* or where `start` is compiled into a regular expression.
122
176
*/
123
177
private DataFlow:: LocalSourceNode trackStrings ( DataFlow:: Node start , TypeTracker t ) {
124
- result = backwards ( _) and
125
- (
126
- t .start ( ) and
127
- start = result and
128
- result = strStart ( )
129
- or
130
- exists ( TypeTracker t2 | result = trackStrings ( start , t2 ) .track ( t2 , t ) )
178
+ t .start ( ) and
179
+ start = result and
180
+ result = strStart ( ) and
181
+ StringReach:: reached ( result , t )
182
+ or
183
+ exists ( TypeTracker t2 | t = StringReach:: stepReached ( t2 , trackStrings ( start , t2 ) , result ) )
184
+ }
185
+
186
+ pragma [ nomagic]
187
+ private predicate regFromStringStart ( DataFlow:: Node start , TypeTracker t , DataFlow:: CallNode nodeTo ) {
188
+ regFromString ( trackStrings ( start , t ) , nodeTo ) and
189
+ exists ( t .continue ( ) )
190
+ }
191
+
192
+ private module RegReachInput implements ReachInputSig {
193
+ DataFlow:: LocalSourceNode start ( TypeTracker t ) {
194
+ result = regStart ( ) and
195
+ t .start ( )
131
196
or
132
- // an additional step from string to string
133
- exists ( TypeTracker t2 | t2 = t .continue ( ) |
134
- step ( trackStrings ( start , t2 ) .getALocalUse ( ) , result , "string" , "string" )
135
- )
136
- )
197
+ regFromStringStart ( _, t , result )
198
+ }
199
+
200
+ DataFlow:: Node end ( ) { result = regSink ( ) }
201
+
202
+ predicate additionalStep ( DataFlow:: LocalSourceNode nodeFrom , DataFlow:: LocalSourceNode nodeTo ) {
203
+ none ( )
204
+ }
137
205
}
138
206
207
+ private module RegReach = Reach< RegReachInput > ;
208
+
139
209
/**
140
210
* Gets a node that has been tracked from the regular expression `start` to some node.
141
211
* This is used to figure out where `start` is executed against an input string.
142
212
*/
143
213
private DataFlow:: LocalSourceNode trackRegs ( DataFlow:: Node start , TypeTracker t ) {
144
- result = backwards ( _ ) and
214
+ RegReach :: reached ( result , t ) and
145
215
(
146
216
t .start ( ) and
147
217
start = result and
148
218
result = regStart ( )
149
219
or
150
- exists ( TypeTracker t2 | result = trackRegs ( start , t2 ) .track ( t2 , t ) )
151
- or
152
- // an additional step where a string is converted to a regular expression
153
- exists ( TypeTracker t2 | t2 = t .continue ( ) |
154
- step ( trackStrings ( start , t2 ) .getALocalUse ( ) , result , "string" , "reg" )
155
- )
220
+ regFromStringStart ( start , t , result )
156
221
)
222
+ or
223
+ exists ( TypeTracker t2 | t = RegReach:: stepReached ( t2 , trackRegs ( start , t2 ) , result ) )
157
224
}
158
225
159
226
/** Gets a node that references a regular expression. */
0 commit comments