Skip to content

Commit be37763

Browse files
committed
improve performance of process() by pruning accept states early
1 parent bf20b7d commit be37763

File tree

4 files changed

+28
-4
lines changed

4 files changed

+28
-4
lines changed

java/ql/lib/semmle/code/java/security/regexp/NfaUtils.qll

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
890890
*/
891891
private predicate isReDoSCandidate(State state, string pump) {
892892
isCandidate(state, pump) and
893+
not epsilonSucc*(state) = AcceptAnySuffix(_) and // pruning early - these can never get stuck in a rejecting state.
893894
(
894895
not isCandidate(epsilonSucc+(state), _)
895896
or
@@ -1022,7 +1023,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10221023
*/
10231024
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
10241025
isReDoSCandidate(fork, w) and
1025-
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
1026+
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next)) and
1027+
not epsilonSucc*(getProcessPrevious(fork, _, w)) = AcceptAnySuffix(_) // we stop `process(..)` early if we can, check here if it happened.
10261028
}
10271029

10281030
/**
@@ -1135,6 +1137,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
11351137
pragma[noopt]
11361138
private State process(State fork, string w, int i) {
11371139
exists(State prev | prev = getProcessPrevious(fork, i, w) |
1140+
not prev = acceptsAnySuffix() and // we stop `process(..)` early if we can. If the successor accepts any suffix, then we know it can never be rejected.
11381141
exists(string char, InputSymbol sym |
11391142
char = w.charAt(i) and
11401143
deltaClosed(prev, sym, result) and
@@ -1145,6 +1148,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
11451148
)
11461149
}
11471150

1151+
/** Gets a state that can reach the `accept-any` state using only epsilon steps. */
1152+
private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
1153+
11481154
/**
11491155
* Gets a state that can be reached from pumpable `fork` consuming all
11501156
* chars in `w` any number of times followed by the first `i` characters of `w`.

javascript/ql/lib/semmle/javascript/security/regexp/NfaUtils.qll

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
890890
*/
891891
private predicate isReDoSCandidate(State state, string pump) {
892892
isCandidate(state, pump) and
893+
not epsilonSucc*(state) = AcceptAnySuffix(_) and // pruning early - these can never get stuck in a rejecting state.
893894
(
894895
not isCandidate(epsilonSucc+(state), _)
895896
or
@@ -1022,7 +1023,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10221023
*/
10231024
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
10241025
isReDoSCandidate(fork, w) and
1025-
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
1026+
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next)) and
1027+
not epsilonSucc*(getProcessPrevious(fork, _, w)) = AcceptAnySuffix(_) // we stop `process(..)` early if we can, check here if it happened.
10261028
}
10271029

10281030
/**
@@ -1135,6 +1137,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
11351137
pragma[noopt]
11361138
private State process(State fork, string w, int i) {
11371139
exists(State prev | prev = getProcessPrevious(fork, i, w) |
1140+
not prev = acceptsAnySuffix() and // we stop `process(..)` early if we can. If the successor accepts any suffix, then we know it can never be rejected.
11381141
exists(string char, InputSymbol sym |
11391142
char = w.charAt(i) and
11401143
deltaClosed(prev, sym, result) and
@@ -1145,6 +1148,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
11451148
)
11461149
}
11471150

1151+
/** Gets a state that can reach the `accept-any` state using only epsilon steps. */
1152+
private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
1153+
11481154
/**
11491155
* Gets a state that can be reached from pumpable `fork` consuming all
11501156
* chars in `w` any number of times followed by the first `i` characters of `w`.

python/ql/lib/semmle/python/security/regexp/NfaUtils.qll

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
890890
*/
891891
private predicate isReDoSCandidate(State state, string pump) {
892892
isCandidate(state, pump) and
893+
not epsilonSucc*(state) = AcceptAnySuffix(_) and // pruning early - these can never get stuck in a rejecting state.
893894
(
894895
not isCandidate(epsilonSucc+(state), _)
895896
or
@@ -1022,7 +1023,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10221023
*/
10231024
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
10241025
isReDoSCandidate(fork, w) and
1025-
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
1026+
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next)) and
1027+
not epsilonSucc*(getProcessPrevious(fork, _, w)) = AcceptAnySuffix(_) // we stop `process(..)` early if we can, check here if it happened.
10261028
}
10271029

10281030
/**
@@ -1135,6 +1137,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
11351137
pragma[noopt]
11361138
private State process(State fork, string w, int i) {
11371139
exists(State prev | prev = getProcessPrevious(fork, i, w) |
1140+
not prev = acceptsAnySuffix() and // we stop `process(..)` early if we can. If the successor accepts any suffix, then we know it can never be rejected.
11381141
exists(string char, InputSymbol sym |
11391142
char = w.charAt(i) and
11401143
deltaClosed(prev, sym, result) and
@@ -1145,6 +1148,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
11451148
)
11461149
}
11471150

1151+
/** Gets a state that can reach the `accept-any` state using only epsilon steps. */
1152+
private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
1153+
11481154
/**
11491155
* Gets a state that can be reached from pumpable `fork` consuming all
11501156
* chars in `w` any number of times followed by the first `i` characters of `w`.

ruby/ql/lib/codeql/ruby/security/regexp/NfaUtils.qll

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
890890
*/
891891
private predicate isReDoSCandidate(State state, string pump) {
892892
isCandidate(state, pump) and
893+
not epsilonSucc*(state) = AcceptAnySuffix(_) and // pruning early - these can never get stuck in a rejecting state.
893894
(
894895
not isCandidate(epsilonSucc+(state), _)
895896
or
@@ -1022,7 +1023,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10221023
*/
10231024
predicate reachesOnlyRejectableSuffixes(State fork, string w) {
10241025
isReDoSCandidate(fork, w) and
1025-
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next))
1026+
forex(State next | next = process(fork, w, w.length() - 1) | isLikelyRejectable(next)) and
1027+
not epsilonSucc*(getProcessPrevious(fork, _, w)) = AcceptAnySuffix(_) // we stop `process(..)` early if we can, check here if it happened.
10261028
}
10271029

10281030
/**
@@ -1135,6 +1137,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
11351137
pragma[noopt]
11361138
private State process(State fork, string w, int i) {
11371139
exists(State prev | prev = getProcessPrevious(fork, i, w) |
1140+
not prev = acceptsAnySuffix() and // we stop `process(..)` early if we can. If the successor accepts any suffix, then we know it can never be rejected.
11381141
exists(string char, InputSymbol sym |
11391142
char = w.charAt(i) and
11401143
deltaClosed(prev, sym, result) and
@@ -1145,6 +1148,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
11451148
)
11461149
}
11471150

1151+
/** Gets a state that can reach the `accept-any` state using only epsilon steps. */
1152+
private State acceptsAnySuffix() { epsilonSucc*(result) = AcceptAnySuffix(_) }
1153+
11481154
/**
11491155
* Gets a state that can be reached from pumpable `fork` consuming all
11501156
* chars in `w` any number of times followed by the first `i` characters of `w`.

0 commit comments

Comments
 (0)