@@ -4,6 +4,10 @@ private import internal.ParseRegExp
4
4
private import codeql.NumberUtils
5
5
private import codeql.ruby.ast.Literal as Ast
6
6
private import codeql.Locations
7
+ private import codeql.regex.nfa.NfaUtils as NfaUtils
8
+ private import codeql.regex.RegexTreeView
9
+ // exporting as RegexTreeView, and in the top-level scope.
10
+ import Impl as RegexTreeView
7
11
import Impl
8
12
9
13
/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
@@ -52,7 +56,7 @@ private newtype TRegExpParent =
52
56
}
53
57
54
58
/** An implementation that statisfies the RegexTreeView signature. */
55
- private module Impl {
59
+ private module Impl implements RegexTreeViewSig {
56
60
/**
57
61
* An element containing a regular expression term, that is, either
58
62
* a string literal (parsed as a regular expression)
@@ -1157,4 +1161,67 @@ private module Impl {
1157
1161
*/
1158
1162
predicate isInverted ( ) { re .namedCharacterPropertyIsInverted ( start , end ) }
1159
1163
}
1164
+
1165
+ class Top = RegExpParent ;
1166
+
1167
+ /**
1168
+ * Holds if `term` is an escape class representing e.g. `\d`.
1169
+ * `clazz` is which character class it represents, e.g. "d" for `\d`.
1170
+ */
1171
+ predicate isEscapeClass ( RegExpTerm term , string clazz ) {
1172
+ exists ( RegExpCharacterClassEscape escape | term = escape | escape .getValue ( ) = clazz )
1173
+ or
1174
+ // TODO: expand to cover more properties
1175
+ exists ( RegExpNamedCharacterProperty escape | term = escape |
1176
+ escape .getName ( ) .toLowerCase ( ) = "digit" and
1177
+ if escape .isInverted ( ) then clazz = "D" else clazz = "d"
1178
+ or
1179
+ escape .getName ( ) .toLowerCase ( ) = "space" and
1180
+ if escape .isInverted ( ) then clazz = "S" else clazz = "s"
1181
+ or
1182
+ escape .getName ( ) .toLowerCase ( ) = "word" and
1183
+ if escape .isInverted ( ) then clazz = "W" else clazz = "w"
1184
+ )
1185
+ }
1186
+
1187
+ /**
1188
+ * Holds if the regular expression should not be considered.
1189
+ */
1190
+ predicate isExcluded ( RegExpParent parent ) {
1191
+ parent .( RegExpTerm ) .getRegExp ( ) .( Ast:: RegExpLiteral ) .hasFreeSpacingFlag ( ) // exclude free-spacing mode regexes
1192
+ }
1193
+
1194
+ /**
1195
+ * Holds if `term` is a possessive quantifier.
1196
+ * Not currently implemented, but is used by the shared library.
1197
+ */
1198
+ predicate isPossessive ( RegExpQuantifier term ) { none ( ) }
1199
+
1200
+ /**
1201
+ * Holds if the regex that `term` is part of is used in a way that ignores any leading prefix of the input it's matched against.
1202
+ * Not yet implemented for Ruby.
1203
+ */
1204
+ predicate matchesAnyPrefix ( RegExpTerm term ) { any ( ) }
1205
+
1206
+ /**
1207
+ * Holds if the regex that `term` is part of is used in a way that ignores any trailing suffix of the input it's matched against.
1208
+ * Not yet implemented for Ruby.
1209
+ */
1210
+ predicate matchesAnySuffix ( RegExpTerm term ) { any ( ) }
1211
+
1212
+ /**
1213
+ * Holds if `root` has the `i` flag for case-insensitive matching.
1214
+ */
1215
+ predicate isIgnoreCase ( RegExpTerm root ) {
1216
+ root .isRootTerm ( ) and
1217
+ root .getLiteral ( ) .isIgnoreCase ( )
1218
+ }
1219
+
1220
+ /**
1221
+ * Holds if `root` has the `s` flag for multi-line matching.
1222
+ */
1223
+ predicate isDotAll ( RegExpTerm root ) {
1224
+ root .isRootTerm ( ) and
1225
+ root .getLiteral ( ) .isDotAll ( )
1226
+ }
1160
1227
}
0 commit comments