@@ -1114,4 +1114,64 @@ module RegExp {
1114
1114
or
1115
1115
result = node .asExpr ( ) .( StringLiteral ) .asRegExp ( )
1116
1116
}
1117
+
1118
+ /**
1119
+ * A character that will be analyzed by `RegExp::alwaysMatchesMetaCharacter`.
1120
+ *
1121
+ * Currently only `<`, `'`, and `"` are considered to be meta-characters, but new meta-characters
1122
+ * can be added by subclassing this class.
1123
+ */
1124
+ abstract class MetaCharacter extends string {
1125
+ bindingset [ this ]
1126
+ MetaCharacter ( ) { any ( ) }
1127
+
1128
+ /**
1129
+ * Holds if the given atomic term matches this meta-character.
1130
+ *
1131
+ * Does not hold for derived terms like alternatives and groups.
1132
+ *
1133
+ * By default, `.`, `\W`, `\S`, and `\D` are considered to match any meta-character,
1134
+ * but the predicate can be overridden for meta-characters where this is not the case.
1135
+ */
1136
+ predicate matchedByAtom ( RegExpTerm term ) {
1137
+ term .( RegExpConstant ) .getConstantValue ( ) = this
1138
+ or
1139
+ term instanceof RegExpDot
1140
+ or
1141
+ term .( RegExpCharacterClassEscape ) .getValue ( ) = [ "\\W" , "\\S" , "\\D" ]
1142
+ or
1143
+ exists ( string lo , string hi |
1144
+ term .( RegExpCharacterRange ) .isRange ( lo , hi ) and
1145
+ lo <= this and
1146
+ this <= hi
1147
+ )
1148
+ }
1149
+ }
1150
+
1151
+ private class DefaultMetaCharacter extends MetaCharacter {
1152
+ DefaultMetaCharacter ( ) { this = [ "<" , "'" , "\"" ] }
1153
+ }
1154
+
1155
+ /**
1156
+ * Holds if `term` can match any occurence of `char` within a string (not taking into account
1157
+ * the context in which `term` appears).
1158
+ *
1159
+ * This predicate is under-approximate and never considers sequences to guarantee a match.
1160
+ */
1161
+ predicate alwaysMatchesMetaCharacter ( RegExpTerm term , MetaCharacter char ) {
1162
+ not term .getParent ( ) instanceof RegExpSequence and // restrict size of predicate
1163
+ char .matchedByAtom ( term )
1164
+ or
1165
+ alwaysMatchesMetaCharacter ( term .( RegExpGroup ) .getAChild ( ) , char )
1166
+ or
1167
+ alwaysMatchesMetaCharacter ( term .( RegExpAlt ) .getAlternative ( ) , char )
1168
+ or
1169
+ exists ( RegExpCharacterClass class_ | term = class_ |
1170
+ not class_ .isInverted ( ) and
1171
+ char .matchedByAtom ( class_ .getAChild ( ) )
1172
+ or
1173
+ class_ .isInverted ( ) and
1174
+ not char .matchedByAtom ( class_ .getAChild ( ) )
1175
+ )
1176
+ }
1117
1177
}
0 commit comments