Skip to content

Commit 473bc92

Browse files
committed
move the PrefixConstruction module out of the ReDoSPruning module
1 parent 70ec709 commit 473bc92

File tree

4 files changed

+428
-400
lines changed

4 files changed

+428
-400
lines changed

java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll

Lines changed: 107 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,101 @@ predicate isStartState(State state) {
877877
*/
878878
signature predicate isCandidateSig(State state, string pump);
879879

880+
/**
881+
* Holds if `state` is a candidate for ReDoS.
882+
*/
883+
signature predicate isCandidateSig(State state);
884+
885+
/**
886+
* Predicates for constructing a prefix string that leads to a given state.
887+
*/
888+
module PrefixConstruction<isCandidateSig/1 isCandidate> {
889+
/**
890+
* Holds if `state` is the textually last start state for the regular expression.
891+
*/
892+
private predicate lastStartState(State state) {
893+
exists(RegExpRoot root |
894+
state =
895+
max(State s, Location l |
896+
s = stateInPumpableRegexp() and
897+
isStartState(s) and
898+
getRoot(s.getRepr()) = root and
899+
l = s.getRepr().getLocation()
900+
|
901+
s
902+
order by
903+
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
904+
l.getEndLine()
905+
)
906+
)
907+
}
908+
909+
/**
910+
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
911+
*/
912+
private predicate existsTransition(State a, State b) { delta(a, _, b) }
913+
914+
/**
915+
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
916+
*/
917+
int prefixLength(State start, State state) =
918+
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
919+
920+
/**
921+
* Gets the minimum number of transitions it takes to reach `state` from the start state.
922+
*/
923+
private int lengthFromStart(State state) { result = prefixLength(_, state) }
924+
925+
/**
926+
* Gets a string for which the regular expression will reach `state`.
927+
*
928+
* Has at most one result for any given `state`.
929+
* This predicate will not always have a result even if there is a ReDoS issue in
930+
* the regular expression.
931+
*/
932+
string prefix(State state) {
933+
lastStartState(state) and
934+
result = ""
935+
or
936+
// the search stops past the last redos candidate state.
937+
lengthFromStart(state) <= max(lengthFromStart(any(State s | isCandidate(s)))) and
938+
exists(State prev |
939+
// select a unique predecessor (by an arbitrary measure)
940+
prev =
941+
min(State s, Location loc |
942+
lengthFromStart(s) = lengthFromStart(state) - 1 and
943+
loc = s.getRepr().getLocation() and
944+
delta(s, _, state)
945+
|
946+
s
947+
order by
948+
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
949+
s.getRepr().toString()
950+
)
951+
|
952+
// greedy search for the shortest prefix
953+
result = prefix(prev) and delta(prev, Epsilon(), state)
954+
or
955+
not delta(prev, Epsilon(), state) and
956+
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
957+
)
958+
}
959+
960+
/**
961+
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
962+
*/
963+
private string getCanonicalEdgeChar(State prev, State next) {
964+
result =
965+
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
966+
}
967+
968+
/** Gets a state within a regular expression that has a pumpable state. */
969+
pragma[noinline]
970+
State stateInPumpableRegexp() {
971+
exists(State s | isCandidate(s) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
972+
}
973+
}
974+
880975
/**
881976
* A module for pruning candidate ReDoS states.
882977
* The candidates are specified by the `isCandidate` signature predicate.
@@ -910,95 +1005,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
9101005
/** Gets a state that can reach the `accept-any` state using only epsilon steps. */
9111006
private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
9121007

913-
/**
914-
* Predicates for constructing a prefix string that leads to a given state.
915-
*/
916-
private module PrefixConstruction {
917-
/**
918-
* Holds if `state` is the textually last start state for the regular expression.
919-
*/
920-
private predicate lastStartState(State state) {
921-
exists(RegExpRoot root |
922-
state =
923-
max(State s, Location l |
924-
s = stateInPumpableRegexp() and
925-
isStartState(s) and
926-
getRoot(s.getRepr()) = root and
927-
l = s.getRepr().getLocation()
928-
|
929-
s
930-
order by
931-
l.getStartLine(), l.getStartColumn(), s.getRepr().toString(), l.getEndColumn(),
932-
l.getEndLine()
933-
)
934-
)
935-
}
1008+
predicate isCandidateState(State s) { isReDoSCandidate(s, _) }
9361009

937-
/**
938-
* Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
939-
*/
940-
private predicate existsTransition(State a, State b) { delta(a, _, b) }
941-
942-
/**
943-
* Gets the minimum number of transitions it takes to reach `state` from the `start` state.
944-
*/
945-
int prefixLength(State start, State state) =
946-
shortestDistances(lastStartState/1, existsTransition/2)(start, state, result)
947-
948-
/**
949-
* Gets the minimum number of transitions it takes to reach `state` from the start state.
950-
*/
951-
private int lengthFromStart(State state) { result = prefixLength(_, state) }
952-
953-
/**
954-
* Gets a string for which the regular expression will reach `state`.
955-
*
956-
* Has at most one result for any given `state`.
957-
* This predicate will not always have a result even if there is a ReDoS issue in
958-
* the regular expression.
959-
*/
960-
string prefix(State state) {
961-
lastStartState(state) and
962-
result = ""
963-
or
964-
// the search stops past the last redos candidate state.
965-
lengthFromStart(state) <= max(lengthFromStart(any(State s | isReDoSCandidate(s, _)))) and
966-
exists(State prev |
967-
// select a unique predecessor (by an arbitrary measure)
968-
prev =
969-
min(State s, Location loc |
970-
lengthFromStart(s) = lengthFromStart(state) - 1 and
971-
loc = s.getRepr().getLocation() and
972-
delta(s, _, state)
973-
|
974-
s
975-
order by
976-
loc.getStartLine(), loc.getStartColumn(), loc.getEndLine(), loc.getEndColumn(),
977-
s.getRepr().toString()
978-
)
979-
|
980-
// greedy search for the shortest prefix
981-
result = prefix(prev) and delta(prev, Epsilon(), state)
982-
or
983-
not delta(prev, Epsilon(), state) and
984-
result = prefix(prev) + getCanonicalEdgeChar(prev, state)
985-
)
986-
}
987-
988-
/**
989-
* Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
990-
*/
991-
private string getCanonicalEdgeChar(State prev, State next) {
992-
result =
993-
min(string c | delta(prev, any(InputSymbol symbol | c = intersect(Any(), symbol)), next))
994-
}
995-
996-
/** Gets a state within a regular expression that has a pumpable state. */
997-
pragma[noinline]
998-
State stateInPumpableRegexp() {
999-
exists(State s | isReDoSCandidate(s, _) | getRoot(s.getRepr()) = getRoot(result.getRepr()))
1000-
}
1001-
}
1010+
import PrefixConstruction<isCandidateState/1> as Prefix
10021011

10031012
/**
10041013
* Predicates for testing the presence of a rejecting suffix.
@@ -1018,8 +1027,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10181027
* using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
10191028
*/
10201029
private module SuffixConstruction {
1021-
import PrefixConstruction
1022-
10231030
/**
10241031
* Holds if all states reachable from `fork` by repeating `w`
10251032
* are likely rejectable by appending some suffix.
@@ -1036,7 +1043,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10361043
*/
10371044
pragma[noinline]
10381045
private predicate isLikelyRejectable(State s) {
1039-
s = stateInPumpableRegexp() and
1046+
s = Prefix::stateInPumpableRegexp() and
10401047
(
10411048
// exists a reject edge with some char.
10421049
hasRejectEdge(s)
@@ -1052,15 +1059,15 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10521059
* Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
10531060
*/
10541061
predicate isRejectState(State s) {
1055-
s = stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
1062+
s = Prefix::stateInPumpableRegexp() and not epsilonSucc*(s) = Accept(_)
10561063
}
10571064

10581065
/**
10591066
* Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
10601067
*/
10611068
pragma[noopt]
10621069
predicate hasEdgeToLikelyRejectable(State s) {
1063-
s = stateInPumpableRegexp() and
1070+
s = Prefix::stateInPumpableRegexp() and
10641071
// all edges (at least one) with some char leads to another state that is rejectable.
10651072
// the `next` states might not share a common suffix, which can cause FPs.
10661073
exists(string char | char = hasEdgeToLikelyRejectableHelper(s) |
@@ -1076,7 +1083,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10761083
*/
10771084
pragma[noinline]
10781085
private string hasEdgeToLikelyRejectableHelper(State s) {
1079-
s = stateInPumpableRegexp() and
1086+
s = Prefix::stateInPumpableRegexp() and
10801087
not hasRejectEdge(s) and
10811088
not isRejectState(s) and
10821089
deltaClosedChar(s, result, _)
@@ -1088,8 +1095,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10881095
* `prev` to `next` that the character symbol `char`.
10891096
*/
10901097
predicate deltaClosedChar(State prev, string char, State next) {
1091-
prev = stateInPumpableRegexp() and
1092-
next = stateInPumpableRegexp() and
1098+
prev = Prefix::stateInPumpableRegexp() and
1099+
next = Prefix::stateInPumpableRegexp() and
10931100
deltaClosed(prev, getAnInputSymbolMatchingRelevant(char), next)
10941101
}
10951102

@@ -1208,12 +1215,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
12081215
predicate hasReDoSResult(RegExpTerm t, string pump, State s, string prefixMsg) {
12091216
isReDoSAttackable(t, pump, s) and
12101217
(
1211-
prefixMsg = "starting with '" + escape(PrefixConstruction::prefix(s)) + "' and " and
1212-
not PrefixConstruction::prefix(s) = ""
1218+
prefixMsg = "starting with '" + escape(Prefix::prefix(s)) + "' and " and
1219+
not Prefix::prefix(s) = ""
12131220
or
1214-
PrefixConstruction::prefix(s) = "" and prefixMsg = ""
1221+
Prefix::prefix(s) = "" and prefixMsg = ""
12151222
or
1216-
not exists(PrefixConstruction::prefix(s)) and prefixMsg = ""
1223+
not exists(Prefix::prefix(s)) and prefixMsg = ""
12171224
)
12181225
}
12191226

0 commit comments

Comments
 (0)