@@ -838,38 +838,32 @@ predicate isStartState(State state) {
838
838
}
839
839
840
840
/**
841
- * A configuration for which parts of a regular expression should be considered relevant for
842
- * the different predicates in `ReDoS.qll`.
843
- * Used to adjust the computations for either superlinear or exponential backtracking.
841
+ * Holds if `state` is a candidate for ReDoS with string `pump`.
844
842
*/
845
- abstract class ReDoSConfiguration extends string {
846
- bindingset [ this ]
847
- ReDoSConfiguration ( ) { any ( ) }
843
+ signature predicate isCandidateSig ( State state , string pump ) ;
848
844
849
- /**
850
- * Holds if `state` with the pump string `pump` is a candidate for a
851
- * ReDoS vulnerable state.
852
- * This is used to determine which states are considered for the prefix/suffix construction.
853
- */
854
- abstract predicate isReDoSCandidate ( State state , string pump ) ;
855
- }
856
-
857
- module ReDoSPruning {
845
+ /**
846
+ * A module for pruning candidate ReDoS states.
847
+ * The candidates are specified by the `isCandidate` signature predicate.
848
+ * The candidates are checked for rejecting suffixes and deduplicated,
849
+ * and the resulting ReDoS states are read by the `hasReDoSResult` predicate.
850
+ */
851
+ module ReDoSPruning< isCandidateSig / 2 isCandidate> {
858
852
/**
859
853
* Holds if repeating `pump` starting at `state` is a candidate for causing backtracking.
860
854
* No check whether a rejected suffix exists has been made.
861
855
*/
862
856
private predicate isReDoSCandidate ( State state , string pump ) {
863
- any ( ReDoSConfiguration conf ) . isReDoSCandidate ( state , pump ) and
857
+ isCandidate ( state , pump ) and
864
858
(
865
- not any ( ReDoSConfiguration conf ) . isReDoSCandidate ( epsilonSucc + ( state ) , _)
859
+ not isCandidate ( epsilonSucc + ( state ) , _)
866
860
or
867
861
epsilonSucc + ( state ) = state and
868
862
state =
869
863
max ( State s , Location l |
870
864
s = epsilonSucc + ( state ) and
871
865
l = s .getRepr ( ) .getLocation ( ) and
872
- any ( ReDoSConfiguration conf ) . isReDoSCandidate ( s , _) and
866
+ isCandidate ( s , _) and
873
867
s .getRepr ( ) instanceof InfiniteRepetitionQuantifier
874
868
|
875
869
s order by l .getStartLine ( ) , l .getStartColumn ( ) , l .getEndColumn ( ) , l .getEndLine ( )
@@ -887,8 +881,11 @@ module ReDoSPruning {
887
881
private predicate lastStartState ( State state ) {
888
882
exists ( RegExpRoot root |
889
883
state =
890
- max ( StateInPumpableRegexp s , Location l |
891
- isStartState ( s ) and getRoot ( s .getRepr ( ) ) = root and l = s .getRepr ( ) .getLocation ( )
884
+ max ( State s , Location l |
885
+ s = stateInPumpableRegexp ( ) and
886
+ isStartState ( s ) and
887
+ getRoot ( s .getRepr ( ) ) = root and
888
+ l = s .getRepr ( ) .getLocation ( )
892
889
|
893
890
s
894
891
order by
@@ -957,14 +954,10 @@ module ReDoSPruning {
957
954
min ( string c | delta ( prev , any ( InputSymbol symbol | c = intersect ( Any ( ) , symbol ) ) , next ) )
958
955
}
959
956
960
- /**
961
- * A state within a regular expression that has a pumpable state.
962
- */
963
- class StateInPumpableRegexp extends State {
964
- pragma [ noinline]
965
- StateInPumpableRegexp ( ) {
966
- exists ( State s | isReDoSCandidate ( s , _) | getRoot ( s .getRepr ( ) ) = getRoot ( this .getRepr ( ) ) )
967
- }
957
+ /** Gets a state within a regular expression that has a pumpable state. */
958
+ pragma [ noinline]
959
+ State stateInPumpableRegexp ( ) {
960
+ exists ( State s | isReDoSCandidate ( s , _) | getRoot ( s .getRepr ( ) ) = getRoot ( result .getRepr ( ) ) )
968
961
}
969
962
}
970
963
@@ -1002,26 +995,32 @@ module ReDoSPruning {
1002
995
* This predicate might find impossible suffixes when searching for suffixes of length > 1, which can cause FPs.
1003
996
*/
1004
997
pragma [ noinline]
1005
- private predicate isLikelyRejectable ( StateInPumpableRegexp s ) {
1006
- // exists a reject edge with some char.
1007
- hasRejectEdge ( s )
1008
- or
1009
- hasEdgeToLikelyRejectable ( s )
1010
- or
1011
- // stopping here is rejection
1012
- isRejectState ( s )
998
+ private predicate isLikelyRejectable ( State s ) {
999
+ s = stateInPumpableRegexp ( ) and
1000
+ (
1001
+ // exists a reject edge with some char.
1002
+ hasRejectEdge ( s )
1003
+ or
1004
+ hasEdgeToLikelyRejectable ( s )
1005
+ or
1006
+ // stopping here is rejection
1007
+ isRejectState ( s )
1008
+ )
1013
1009
}
1014
1010
1015
1011
/**
1016
1012
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
1017
1013
*/
1018
- predicate isRejectState ( StateInPumpableRegexp s ) { not epsilonSucc * ( s ) = Accept ( _) }
1014
+ predicate isRejectState ( State s ) {
1015
+ s = stateInPumpableRegexp ( ) and not epsilonSucc * ( s ) = Accept ( _)
1016
+ }
1019
1017
1020
1018
/**
1021
1019
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
1022
1020
*/
1023
1021
pragma [ noopt]
1024
- predicate hasEdgeToLikelyRejectable ( StateInPumpableRegexp s ) {
1022
+ predicate hasEdgeToLikelyRejectable ( State s ) {
1023
+ s = stateInPumpableRegexp ( ) and
1025
1024
// all edges (at least one) with some char leads to another state that is rejectable.
1026
1025
// the `next` states might not share a common suffix, which can cause FPs.
1027
1026
exists ( string char | char = hasEdgeToLikelyRejectableHelper ( s ) |
@@ -1036,7 +1035,8 @@ module ReDoSPruning {
1036
1035
* and `s` has not been found to be rejectable by `hasRejectEdge` or `isRejectState`.
1037
1036
*/
1038
1037
pragma [ noinline]
1039
- private string hasEdgeToLikelyRejectableHelper ( StateInPumpableRegexp s ) {
1038
+ private string hasEdgeToLikelyRejectableHelper ( State s ) {
1039
+ s = stateInPumpableRegexp ( ) and
1040
1040
not hasRejectEdge ( s ) and
1041
1041
not isRejectState ( s ) and
1042
1042
deltaClosedChar ( s , result , _)
@@ -1047,7 +1047,9 @@ module ReDoSPruning {
1047
1047
* along epsilon edges, such that there is a transition from
1048
1048
* `prev` to `next` that the character symbol `char`.
1049
1049
*/
1050
- predicate deltaClosedChar ( StateInPumpableRegexp prev , string char , StateInPumpableRegexp next ) {
1050
+ predicate deltaClosedChar ( State prev , string char , State next ) {
1051
+ prev = stateInPumpableRegexp ( ) and
1052
+ next = stateInPumpableRegexp ( ) and
1051
1053
deltaClosed ( prev , getAnInputSymbolMatchingRelevant ( char ) , next )
1052
1054
}
1053
1055
@@ -1148,7 +1150,7 @@ module ReDoSPruning {
1148
1150
exists ( int i , string c | s = Match ( term , i ) |
1149
1151
c =
1150
1152
min ( string w |
1151
- any ( ReDoSConfiguration conf ) . isReDoSCandidate ( s , w ) and
1153
+ isCandidate ( s , w ) and
1152
1154
SuffixConstruction:: reachesOnlyRejectableSuffixes ( s , w )
1153
1155
|
1154
1156
w order by w .length ( ) , w
@@ -1173,28 +1175,28 @@ module ReDoSPruning {
1173
1175
not exists ( PrefixConstruction:: prefix ( s ) ) and prefixMsg = ""
1174
1176
)
1175
1177
}
1176
- }
1177
1178
1178
- /**
1179
- * Gets the result of backslash-escaping newlines, carriage-returns and
1180
- * backslashes in `s`.
1181
- */
1182
- bindingset [ s]
1183
- private string escape ( string s ) {
1184
- result =
1185
- s .replaceAll ( "\\" , "\\\\" )
1186
- .replaceAll ( "\n" , "\\n" )
1187
- .replaceAll ( "\r" , "\\r" )
1188
- .replaceAll ( "\t" , "\\t" )
1189
- }
1179
+ /**
1180
+ * Gets the result of backslash-escaping newlines, carriage-returns and
1181
+ * backslashes in `s`.
1182
+ */
1183
+ bindingset [ s]
1184
+ private string escape ( string s ) {
1185
+ result =
1186
+ s .replaceAll ( "\\" , "\\\\" )
1187
+ .replaceAll ( "\n" , "\\n" )
1188
+ .replaceAll ( "\r" , "\\r" )
1189
+ .replaceAll ( "\t" , "\\t" )
1190
+ }
1190
1191
1191
- /**
1192
- * Gets `str` with the last `i` characters moved to the front.
1193
- *
1194
- * We use this to adjust the pump string to match with the beginning of
1195
- * a RegExpTerm, so it doesn't start in the middle of a constant.
1196
- */
1197
- bindingset [ str, i]
1198
- private string rotate ( string str , int i ) {
1199
- result = str .suffix ( str .length ( ) - i ) + str .prefix ( str .length ( ) - i )
1192
+ /**
1193
+ * Gets `str` with the last `i` characters moved to the front.
1194
+ *
1195
+ * We use this to adjust the pump string to match with the beginning of
1196
+ * a RegExpTerm, so it doesn't start in the middle of a constant.
1197
+ */
1198
+ bindingset [ str, i]
1199
+ private string rotate ( string str , int i ) {
1200
+ result = str .suffix ( str .length ( ) - i ) + str .prefix ( str .length ( ) - i )
1201
+ }
1200
1202
}
0 commit comments