@@ -13,87 +13,53 @@ private class ExploitableStringLiteral extends StringLiteral {
13
13
ExploitableStringLiteral ( ) { this .getValue ( ) .matches ( [ "%+%" , "%*%" , "%{%}%" ] ) }
14
14
}
15
15
16
- private class RegexCompileFlowConf extends DataFlow2:: Configuration {
17
- RegexCompileFlowConf ( ) { this = "RegexCompileFlowConfig" }
18
-
19
- override predicate isSource ( DataFlow:: Node node ) {
20
- node .asExpr ( ) instanceof ExploitableStringLiteral
21
- }
22
-
23
- override predicate isSink ( DataFlow:: Node node ) {
24
- sinkNode ( node , [ "regex-compile" , "regex-compile-match" , "regex-compile-find" ] )
25
- }
26
-
27
- override predicate isBarrier ( DataFlow:: Node node ) {
28
- node .getEnclosingCallable ( ) .getDeclaringType ( ) instanceof NonSecurityTestClass
29
- }
30
- }
31
-
32
- /**
33
- * Holds if `s` is used as a regex, with the mode `mode` (if known).
34
- * If regex mode is not known, `mode` will be `"None"`.
35
- *
36
- * As an optimisation, only regexes containing an infinite repitition quatifier (`+`, `*`, or `{x,}`)
37
- * and therefore may be relevant for ReDoS queries are considered.
38
- */
39
- predicate usedAsRegex ( StringLiteral s , string mode , boolean match_full_string ) {
40
- exists ( DataFlow:: Node sink |
41
- any ( RegexCompileFlowConf c ) .hasFlow ( DataFlow2:: exprNode ( s ) , sink ) and
42
- mode = "None" and // TODO: proper mode detection
43
- ( if matchesFullString ( sink ) then match_full_string = true else match_full_string = false )
44
- )
45
- }
46
-
47
16
/**
48
- * Holds if the regex that flows to `sink` is used to match against a full string,
49
- * as though it was implicitly surrounded by ^ and $.
17
+ * Holds if `kind` is an external sink kind that is relevant for regex flow.
18
+ * `full` is true if sinks with this kind match against the full string of its input.
19
+ * `strArg` is the index of the argument to methods with this sink kind that contan the string to be matched against,
20
+ * where -1 is the qualifier; or -2 if no such argument exists.
50
21
*/
51
- private predicate matchesFullString ( DataFlow:: Node sink ) {
52
- sinkNode ( sink , "regex-compile-match" )
53
- or
54
- exists ( DataFlow:: Node matchSource , RegexCompileToMatchConf conf |
55
- matchSource .asExpr ( ) .( MethodAccess ) .getAnArgument ( ) = sink .asExpr ( ) and
56
- conf .hasFlow ( matchSource , _)
22
+ private predicate regexSinkKindInfo ( string kind , boolean full , int strArg ) {
23
+ sinkModel ( _, _, _, _, _, _, _, kind ) and
24
+ exists ( string fullStr , string strArgStr |
25
+ (
26
+ full = true and fullStr = "f"
27
+ or
28
+ full = false and fullStr = ""
29
+ ) and
30
+ (
31
+ strArgStr .toInt ( ) = strArg
32
+ or
33
+ strArg = - 2 and
34
+ strArgStr = ""
35
+ )
36
+ |
37
+ kind = "regex-use[" + fullStr + strArgStr + "]"
57
38
)
58
39
}
59
40
60
- private class RegexCompileToMatchConf extends DataFlow2 :: Configuration {
61
- RegexCompileToMatchConf ( ) { this = "RegexCompileToMatchConfig" }
62
-
63
- override predicate isSource ( DataFlow :: Node node ) { sourceNode ( node , "regex-compile" ) }
41
+ /** A sink that is relevant for regex flow. */
42
+ private class RegexFlowSink extends DataFlow :: Node {
43
+ boolean full ;
44
+ int strArg ;
64
45
65
- override predicate isSink ( DataFlow:: Node node ) { sinkNode ( node , "regex-match" ) }
66
-
67
- override predicate isAdditionalFlowStep ( DataFlow:: Node node1 , DataFlow:: Node node2 ) {
68
- exists ( MethodAccess ma | node2 .asExpr ( ) = ma and node1 .asExpr ( ) = ma .getQualifier ( ) |
69
- ma .getMethod ( ) .hasQualifiedName ( "java.util.regex" , "Pattern" , "matcher" )
46
+ RegexFlowSink ( ) {
47
+ exists ( string kind |
48
+ regexSinkKindInfo ( kind , full , strArg ) and
49
+ sinkNode ( this , kind )
70
50
)
71
51
}
72
- }
73
-
74
- /**
75
- * A method access that can match a regex against a string
76
- */
77
- abstract class RegexMatchMethodAccess extends MethodAccess {
78
- string package ;
79
- string type ;
80
- string name ;
81
- int regexArg ;
82
- int stringArg ;
83
- Method m ;
84
-
85
- RegexMatchMethodAccess ( ) {
86
- this .getMethod ( ) .getSourceDeclaration ( ) .overrides * ( m ) and
87
- m .hasQualifiedName ( package , type , name ) and
88
- regexArg in [ - 1 .. m .getNumberOfParameters ( ) - 1 ] and
89
- stringArg in [ - 1 .. m .getNumberOfParameters ( ) - 1 ]
90
- }
91
52
92
- /** Gets the argument of this call that the regex to be matched against flows into . */
93
- Expr getRegexArg ( ) { result = argOf ( this , regexArg ) }
53
+ /** Holds if a regex that flows here is matched against a full string (rather than a substring) . */
54
+ predicate matchesFullString ( ) { full = true }
94
55
95
- /** Gets the argument of this call that the string being matched flows into. */
96
- Expr getStringArg ( ) { result = argOf ( this , stringArg ) }
56
+ /** Gets the string expression that a regex that flows here is matched against, if any. */
57
+ Expr getStringArgument ( ) {
58
+ exists ( MethodAccess ma |
59
+ this .asExpr ( ) = argOf ( ma , _) and
60
+ result = argOf ( ma , strArg )
61
+ )
62
+ }
97
63
}
98
64
99
65
private Expr argOf ( MethodAccess ma , int arg ) {
@@ -115,35 +81,7 @@ class RegexAdditionalFlowStep extends Unit {
115
81
abstract predicate step ( DataFlow:: Node node1 , DataFlow:: Node node2 ) ;
116
82
}
117
83
118
- // TODO: can this be done with the models-as-data framework?
119
- private class JdkRegexMatchMethodAccess extends RegexMatchMethodAccess {
120
- JdkRegexMatchMethodAccess ( ) {
121
- package = "java.util.regex" and
122
- type = "Pattern" and
123
- (
124
- name = "matcher" and regexArg = - 1 and stringArg = 0
125
- or
126
- name = "matches" and regexArg = 0 and stringArg = 1
127
- or
128
- name = "split" and regexArg = - 1 and stringArg = 0
129
- or
130
- name = "splitAsStream" and regexArg = - 1 and stringArg = 0
131
- )
132
- or
133
- package = "java.lang" and
134
- type = "String" and
135
- name = [ "matches" , "split" , "replaceAll" , "replaceFirst" ] and
136
- regexArg = 0 and
137
- stringArg = - 1
138
- or
139
- package = "java.util.function" and
140
- type = "Predicate" and
141
- name = "test" and
142
- regexArg = - 1 and
143
- stringArg = 0
144
- }
145
- }
146
-
84
+ // TODO: This may be able to be done with models-as-data if query-specific flow steps beome supported.
147
85
private class JdkRegexFlowStep extends RegexAdditionalFlowStep {
148
86
override predicate step ( DataFlow:: Node node1 , DataFlow:: Node node2 ) {
149
87
exists ( MethodAccess ma , Method m , string package , string type , string name , int arg |
@@ -155,7 +93,7 @@ private class JdkRegexFlowStep extends RegexAdditionalFlowStep {
155
93
package = "java.util.regex" and
156
94
type = "Pattern" and
157
95
(
158
- name = [ "asMatchPredicate" , "asPredicate" ] and
96
+ name = [ "asMatchPredicate" , "asPredicate" , "matcher" ] and
159
97
arg = - 1
160
98
or
161
99
name = "compile" and
@@ -170,16 +108,6 @@ private class JdkRegexFlowStep extends RegexAdditionalFlowStep {
170
108
}
171
109
}
172
110
173
- private class GuavaRegexMatchMethodAccess extends RegexMatchMethodAccess {
174
- GuavaRegexMatchMethodAccess ( ) {
175
- package = "com.google.common.base" and
176
- regexArg = - 1 and
177
- stringArg = 0 and
178
- type = [ "Splitter" , "Splitter$MapSplitter" ] and
179
- name = [ "split" , "splitToList" ]
180
- }
181
- }
182
-
183
111
private class GuavaRegexFlowStep extends RegexAdditionalFlowStep {
184
112
override predicate step ( DataFlow:: Node node1 , DataFlow:: Node node2 ) {
185
113
exists ( MethodAccess ma , Method m , string package , string type , string name , int arg |
@@ -209,20 +137,46 @@ private class GuavaRegexFlowStep extends RegexAdditionalFlowStep {
209
137
}
210
138
}
211
139
212
- private class RegexMatchFlowConf extends DataFlow2:: Configuration {
213
- RegexMatchFlowConf ( ) { this = "RegexMatchFlowConf " }
140
+ private class RegexFlowConf extends DataFlow2:: Configuration {
141
+ RegexFlowConf ( ) { this = "RegexFlowConfig " }
214
142
215
- override predicate isSource ( DataFlow:: Node src ) {
216
- src .asExpr ( ) instanceof ExploitableStringLiteral
143
+ override predicate isSource ( DataFlow:: Node node ) {
144
+ node .asExpr ( ) instanceof ExploitableStringLiteral
217
145
}
218
146
219
- override predicate isSink ( DataFlow:: Node sink ) {
220
- exists ( RegexMatchMethodAccess ma | sink .asExpr ( ) = ma .getRegexArg ( ) )
221
- }
147
+ override predicate isSink ( DataFlow:: Node node ) { node instanceof RegexFlowSink }
222
148
223
149
override predicate isAdditionalFlowStep ( DataFlow:: Node node1 , DataFlow:: Node node2 ) {
224
150
any ( RegexAdditionalFlowStep s ) .step ( node1 , node2 )
225
151
}
152
+
153
+ override predicate isBarrier ( DataFlow:: Node node ) {
154
+ node .getEnclosingCallable ( ) .getDeclaringType ( ) instanceof NonSecurityTestClass
155
+ }
156
+ }
157
+
158
+ /**
159
+ * Holds if `regex` is used as a regex, with the mode `mode` (if known).
160
+ * If regex mode is not known, `mode` will be `"None"`.
161
+ *
162
+ * As an optimisation, only regexes containing an infinite repitition quatifier (`+`, `*`, or `{x,}`)
163
+ * and therefore may be relevant for ReDoS queries are considered.
164
+ */
165
+ predicate usedAsRegex ( StringLiteral regex , string mode , boolean match_full_string ) {
166
+ any ( RegexFlowConf c ) .hasFlow ( DataFlow2:: exprNode ( regex ) , _) and
167
+ mode = "None" and // TODO: proper mode detection
168
+ ( if matchesFullString ( regex ) then match_full_string = true else match_full_string = false )
169
+ }
170
+
171
+ /**
172
+ * Holds if `regex` is used as a regular expression that is matched against a full string,
173
+ * as though it was implicitly surrounded by ^ and $.
174
+ */
175
+ private predicate matchesFullString ( StringLiteral regex ) {
176
+ exists ( RegexFlowConf c , RegexFlowSink sink |
177
+ sink .matchesFullString ( ) and
178
+ c .hasFlow ( DataFlow2:: exprNode ( regex ) , sink )
179
+ )
226
180
}
227
181
228
182
/**
@@ -232,12 +186,8 @@ private class RegexMatchFlowConf extends DataFlow2::Configuration {
232
186
* and therefore may be relevant for ReDoS queries are considered.
233
187
*/
234
188
predicate regexMatchedAgainst ( StringLiteral regex , Expr str ) {
235
- exists (
236
- DataFlow:: Node src , DataFlow:: Node sink , RegexMatchMethodAccess ma , RegexMatchFlowConf conf
237
- |
238
- src .asExpr ( ) = regex and
239
- sink .asExpr ( ) = ma .getRegexArg ( ) and
240
- conf .hasFlow ( src , sink ) and
241
- str = ma .getStringArg ( )
189
+ exists ( RegexFlowConf c , RegexFlowSink sink |
190
+ str = sink .getStringArgument ( ) and
191
+ c .hasFlow ( DataFlow2:: exprNode ( regex ) , sink )
242
192
)
243
193
}
0 commit comments