@@ -36,28 +36,191 @@ abstract class RegexCreation extends DataFlow::Node {
36
36
* created from.
37
37
*/
38
38
abstract DataFlow:: Node getStringInput ( ) ;
39
+
40
+ /**
41
+ * Gets a dataflow node for the options input that might contain parse mode
42
+ * flags (if any).
43
+ */
44
+ DataFlow:: Node getOptionsInput ( ) { none ( ) }
39
45
}
40
46
41
47
/**
42
- * A data-flow node where a `Regex` or `NSRegularExpression` object is created.
48
+ * A data-flow node where a `Regex` object is created.
43
49
*/
44
- private class StandardRegexCreation extends RegexCreation {
50
+ private class RegexRegexCreation extends RegexCreation {
45
51
DataFlow:: Node input ;
46
52
47
- StandardRegexCreation ( ) {
53
+ RegexRegexCreation ( ) {
48
54
exists ( CallExpr call |
49
- (
50
- call .getStaticTarget ( ) .( Method ) .hasQualifiedName ( "Regex" , [ "init(_:)" , "init(_:as:)" ] ) or
51
- call .getStaticTarget ( )
52
- .( Method )
53
- .hasQualifiedName ( "NSRegularExpression" , "init(pattern:options:)" )
54
- ) and
55
+ call .getStaticTarget ( ) .( Method ) .hasQualifiedName ( "Regex" , [ "init(_:)" , "init(_:as:)" ] ) and
56
+ input .asExpr ( ) = call .getArgument ( 0 ) .getExpr ( ) and
57
+ this .asExpr ( ) = call
58
+ )
59
+ }
60
+
61
+ override DataFlow:: Node getStringInput ( ) { result = input }
62
+ }
63
+
64
+ /**
65
+ * A data-flow node where an `NSRegularExpression` object is created.
66
+ */
67
+ private class NSRegularExpressionRegexCreation extends RegexCreation {
68
+ DataFlow:: Node input ;
69
+
70
+ NSRegularExpressionRegexCreation ( ) {
71
+ exists ( CallExpr call |
72
+ call .getStaticTarget ( )
73
+ .( Method )
74
+ .hasQualifiedName ( "NSRegularExpression" , "init(pattern:options:)" ) and
55
75
input .asExpr ( ) = call .getArgument ( 0 ) .getExpr ( ) and
56
76
this .asExpr ( ) = call
57
77
)
58
78
}
59
79
60
80
override DataFlow:: Node getStringInput ( ) { result = input }
81
+
82
+ override DataFlow:: Node getOptionsInput ( ) {
83
+ result .asExpr ( ) = this .asExpr ( ) .( CallExpr ) .getArgument ( 1 ) .getExpr ( )
84
+ }
85
+ }
86
+
87
+ private newtype TRegexParseMode =
88
+ MkIgnoreCase ( ) or // case insensitive
89
+ MkVerbose ( ) or // ignores whitespace and `#` comments within patterns
90
+ MkDotAll ( ) or // dot matches all characters, including line terminators
91
+ MkMultiLine ( ) or // `^` and `$` also match beginning and end of lines
92
+ MkUnicode ( ) // Unicode UAX 29 word boundary mode
93
+
94
+ /**
95
+ * A regular expression parse mode flag.
96
+ */
97
+ class RegexParseMode extends TRegexParseMode {
98
+ /**
99
+ * Gets the name of this parse mode flag.
100
+ */
101
+ string getName ( ) {
102
+ this = MkIgnoreCase ( ) and result = "IGNORECASE"
103
+ or
104
+ this = MkVerbose ( ) and result = "VERBOSE"
105
+ or
106
+ this = MkDotAll ( ) and result = "DOTALL"
107
+ or
108
+ this = MkMultiLine ( ) and result = "MULTILINE"
109
+ or
110
+ this = MkUnicode ( ) and result = "UNICODE"
111
+ }
112
+
113
+ /**
114
+ * Gets a textual representation of this `RegexParseMode`.
115
+ */
116
+ string toString ( ) { result = this .getName ( ) }
117
+ }
118
+
119
+ /**
120
+ * A unit class for adding additional flow steps for regular expressions.
121
+ */
122
+ class RegexAdditionalFlowStep extends Unit {
123
+ /**
124
+ * Holds if the step from `node1` to `node2` should be considered a flow
125
+ * step for regular expressions.
126
+ */
127
+ abstract predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) ;
128
+
129
+ /**
130
+ * Holds if a regular expression parse mode is either set (`isSet` = true)
131
+ * or unset (`isSet` = false) at `node`. Parse modes propagate through
132
+ * array construction and regex construction.
133
+ */
134
+ abstract predicate setsParseMode ( DataFlow:: Node node , RegexParseMode mode , boolean isSet ) ;
135
+ }
136
+
137
+ /**
138
+ * An additional flow step for `Regex`.
139
+ */
140
+ class RegexRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
141
+ override predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
142
+ this .setsParseModeEdge ( nodeFrom , nodeTo , _, _)
143
+ }
144
+
145
+ override predicate setsParseMode ( DataFlow:: Node node , RegexParseMode mode , boolean isSet ) {
146
+ this .setsParseModeEdge ( _, node , mode , isSet )
147
+ }
148
+
149
+ private predicate setsParseModeEdge (
150
+ DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo , RegexParseMode mode , boolean isSet
151
+ ) {
152
+ // `Regex` methods that modify the parse mode of an existing `Regex` object.
153
+ exists ( CallExpr ce |
154
+ nodeFrom .asExpr ( ) = ce .getQualifier ( ) and
155
+ nodeTo .asExpr ( ) = ce and
156
+ // decode the parse mode being set
157
+ (
158
+ ce .getStaticTarget ( ) .( Method ) .hasQualifiedName ( "Regex" , "ignoresCase(_:)" ) and
159
+ mode = MkIgnoreCase ( )
160
+ or
161
+ ce .getStaticTarget ( ) .( Method ) .hasQualifiedName ( "Regex" , "dotMatchesNewlines(_:)" ) and
162
+ mode = MkDotAll ( )
163
+ or
164
+ ce .getStaticTarget ( ) .( Method ) .hasQualifiedName ( "Regex" , "anchorsMatchLineEndings(_:)" ) and
165
+ mode = MkMultiLine ( )
166
+ ) and
167
+ // decode the value being set
168
+ if ce .getArgument ( 0 ) .getExpr ( ) .( BooleanLiteralExpr ) .getValue ( ) = false
169
+ then isSet = false // mode is set to false
170
+ else isSet = true // mode is set to true OR mode is set to default (=true) OR mode is set to an unknown value
171
+ )
172
+ }
173
+ }
174
+
175
+ /**
176
+ * An additional flow step for `NSRegularExpression`.
177
+ */
178
+ class NSRegularExpressionRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
179
+ override predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) { none ( ) }
180
+
181
+ override predicate setsParseMode ( DataFlow:: Node node , RegexParseMode mode , boolean isSet ) {
182
+ // `NSRegularExpression.Options` values (these are typically combined, then passed into
183
+ // the `NSRegularExpression` initializer).
184
+ node .asExpr ( )
185
+ .( MemberRefExpr )
186
+ .getMember ( )
187
+ .( FieldDecl )
188
+ .hasQualifiedName ( "NSRegularExpression.Options" , "caseInsensitive" ) and
189
+ mode = MkIgnoreCase ( ) and
190
+ isSet = true
191
+ or
192
+ node .asExpr ( )
193
+ .( MemberRefExpr )
194
+ .getMember ( )
195
+ .( FieldDecl )
196
+ .hasQualifiedName ( "NSRegularExpression.Options" , "allowCommentsAndWhitespace" ) and
197
+ mode = MkVerbose ( ) and
198
+ isSet = true
199
+ or
200
+ node .asExpr ( )
201
+ .( MemberRefExpr )
202
+ .getMember ( )
203
+ .( FieldDecl )
204
+ .hasQualifiedName ( "NSRegularExpression.Options" , "dotMatchesLineSeparators" ) and
205
+ mode = MkDotAll ( ) and
206
+ isSet = true
207
+ or
208
+ node .asExpr ( )
209
+ .( MemberRefExpr )
210
+ .getMember ( )
211
+ .( FieldDecl )
212
+ .hasQualifiedName ( "NSRegularExpression.Options" , "anchorsMatchLines" ) and
213
+ mode = MkMultiLine ( ) and
214
+ isSet = true
215
+ or
216
+ node .asExpr ( )
217
+ .( MemberRefExpr )
218
+ .getMember ( )
219
+ .( FieldDecl )
220
+ .hasQualifiedName ( "NSRegularExpression.Options" , "useUnicodeWordBoundaries" ) and
221
+ mode = MkUnicode ( ) and
222
+ isSet = true
223
+ }
61
224
}
62
225
63
226
/**
@@ -91,6 +254,19 @@ abstract class RegexEval extends CallExpr {
91
254
RegexUseFlow:: flow ( regexCreation , DataFlow:: exprNode ( this .getRegexInput ( ) ) )
92
255
)
93
256
}
257
+
258
+ /**
259
+ * Gets a parse mode that is set at this evaluation (in at least one path
260
+ * from the creation of the regular expression object).
261
+ */
262
+ RegexParseMode getAParseMode ( ) {
263
+ exists ( DataFlow:: Node setNode |
264
+ // parse mode flag is set
265
+ any ( RegexAdditionalFlowStep s ) .setsParseMode ( setNode , result , true ) and
266
+ // reaches this eval
267
+ RegexParseModeFlow:: flow ( setNode , DataFlow:: exprNode ( this .getRegexInput ( ) ) )
268
+ )
269
+ }
94
270
}
95
271
96
272
/**
0 commit comments