Skip to content

Commit 92db7b0

Browse files
committed
escape unicode chars in the output for the ReDoS queries
1 parent 022a066 commit 92db7b0

File tree

6 files changed

+78
-18
lines changed

6 files changed

+78
-18
lines changed

javascript/ql/test/query-tests/Security/CWE-400/ReDoS/PolynomialBackTracking.expected

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@
445445
| tst.js:146:15:146:21 | (\\d\|5)* | Strings with many repetitions of '0' can start matching anywhere after the start of the preceeding ((\\d\|5)*)" |
446446
| tst.js:149:15:149:24 | (\\s\|[\\f])* | Strings with many repetitions of '\\t' can start matching anywhere after the start of the preceeding ((\\s\|[\\f])*)" |
447447
| tst.js:152:15:152:28 | (\\s\|[\\v]\|\\\\v)* | Strings with many repetitions of '\\t' can start matching anywhere after the start of the preceeding ((\\s\|[\\v]\|\\\\v)*)" |
448-
| tst.js:155:15:155:24 | (\\f\|[\\f])* | Strings with many repetitions of '\u000c' can start matching anywhere after the start of the preceeding ((\\f\|[\\f])*)" |
448+
| tst.js:155:15:155:24 | (\\f\|[\\f])* | Strings with many repetitions of '\\u000c' can start matching anywhere after the start of the preceeding ((\\f\|[\\f])*)" |
449449
| tst.js:158:15:158:22 | (\\W\|\\D)* | Strings with many repetitions of '/' can start matching anywhere after the start of the preceeding ((\\W\|\\D)*)" |
450450
| tst.js:161:15:161:22 | (\\S\|\\w)* | Strings with many repetitions of '!' can start matching anywhere after the start of the preceeding ((\\S\|\\w)*)" |
451451
| tst.js:164:15:164:24 | (\\S\|[\\w])* | Strings with many repetitions of '!' can start matching anywhere after the start of the preceeding ((\\S\|[\\w])*)" |

javascript/ql/test/query-tests/Security/CWE-400/ReDoS/ReDoS.expected

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,9 @@
123123
| tst.js:137:15:137:21 | (\\w\|G)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
124124
| tst.js:143:15:143:22 | (\\d\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
125125
| tst.js:146:15:146:21 | (\\d\|5)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '5'. |
126-
| tst.js:149:15:149:24 | (\\s\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
127-
| tst.js:152:15:152:28 | (\\s\|[\\v]\|\\\\v)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000b'. |
128-
| tst.js:155:15:155:24 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
126+
| tst.js:149:15:149:24 | (\\s\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\u000c'. |
127+
| tst.js:152:15:152:28 | (\\s\|[\\v]\|\\\\v)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\u000b'. |
128+
| tst.js:155:15:155:24 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\u000c'. |
129129
| tst.js:158:15:158:22 | (\\W\|\\D)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '/'. |
130130
| tst.js:161:15:161:22 | (\\S\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
131131
| tst.js:164:15:164:24 | (\\S\|[\\w])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
@@ -199,3 +199,4 @@
199199
| tst.js:404:6:405:7 | (g\|gg)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'gg'. |
200200
| tst.js:407:125:407:127 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with '0/*' and containing many repetitions of ' ;0'. |
201201
| tst.js:411:15:411:19 | a{1,} | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
202+
| tst.js:413:25:413:35 | (\\u0000\|.)+ | This part of the regular expression may cause exponential backtracking on strings starting with '\\n\\u0000' and containing many repetitions of '\\u0000'. |

javascript/ql/test/query-tests/Security/CWE-400/ReDoS/tst.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -408,4 +408,6 @@ var bad98 = /^(?:\*\/\*|[a-zA-Z0-9][a-zA-Z0-9!\#\$&\-\^_\.\+]{0,126}\/(?:\*|[a-z
408408

409409
var good48 = /(\/(?:\/[\w.-]*)*){0,1}:([\w.-]+)/;
410410

411-
var bad99 = /(a{1,})*b/;
411+
var bad99 = /(a{1,})*b/;
412+
413+
var unicode = /^\n\u0000(\u0000|.)+$/;

python/ql/test/query-tests/Security/CWE-730-ReDoS/ReDoS.expected

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@
3535
| redos.py:139:25:139:31 | (\\w\|G)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
3636
| redos.py:145:25:145:32 | (\\d\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
3737
| redos.py:148:25:148:31 | (\\d\|5)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '5'. |
38-
| redos.py:151:25:151:34 | (\\s\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
39-
| redos.py:154:25:154:38 | (\\s\|[\\v]\|\\\\v)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000b'. |
40-
| redos.py:157:25:157:34 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
38+
| redos.py:151:25:151:34 | (\\s\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\u000c'. |
39+
| redos.py:154:25:154:38 | (\\s\|[\\v]\|\\\\v)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\u000b'. |
40+
| redos.py:157:25:157:34 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\u000c'. |
4141
| redos.py:160:25:160:32 | (\\W\|\\D)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
4242
| redos.py:163:25:163:32 | (\\S\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
4343
| redos.py:166:25:166:34 | (\\S\|[\\w])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
@@ -67,8 +67,8 @@
6767
| redos.py:259:24:259:126 | (.thisisagoddamnlongstringforstresstestingthequery\|\\sthisisagoddamnlongstringforstresstestingthequery)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\tthisisagoddamnlongstringforstresstestingthequery'. |
6868
| redos.py:262:24:262:87 | (thisisagoddamnlongstringforstresstestingthequery\|this\\w+query)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'thisisagoddamnlongstringforstresstestingthequery'. |
6969
| redos.py:262:78:262:80 | \\w+ | This part of the regular expression may cause exponential backtracking on strings starting with 'this' and containing many repetitions of '0querythis'. |
70-
| redos.py:268:28:268:39 | ([\ufffd\ufffd]\|[\ufffd\ufffd])* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
71-
| redos.py:271:28:271:41 | ((\ufffd\|\ufffd)\|(\ufffd\|\ufffd))* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\ufffd'. |
70+
| redos.py:268:28:268:39 | ([\ufffd\ufffd]\|[\ufffd\ufffd])* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\\ufffd'. |
71+
| redos.py:271:28:271:41 | ((\ufffd\|\ufffd)\|(\ufffd\|\ufffd))* | This part of the regular expression may cause exponential backtracking on strings starting with 'foo' and containing many repetitions of '\\ufffd'. |
7272
| redos.py:274:31:274:32 | b+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'b'. |
7373
| redos.py:277:48:277:50 | \\s* | This part of the regular expression may cause exponential backtracking on strings starting with '<0\\t0=' and containing many repetitions of '""\\t0='. |
7474
| redos.py:283:26:283:27 | a+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a'. |
@@ -103,5 +103,5 @@
103103
| redos.py:385:24:385:30 | (\\d\|0)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
104104
| redos.py:386:26:386:32 | (\\d\|0)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
105105
| redos.py:391:15:391:25 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of 'a'. |
106-
| unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\u00c6'. |
106+
| unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\\u00c6'. |
107107
| unittests.py:9:16:9:24 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |

ruby/ql/test/query-tests/security/cwe-1333-exponential-redos/ReDoS.expected

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@
3333
| tst.rb:137:11:137:17 | (\\w\|G)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'G'. |
3434
| tst.rb:143:11:143:18 | (\\d\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
3535
| tst.rb:146:11:146:17 | (\\d\|5)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '5'. |
36-
| tst.rb:149:11:149:20 | (\\s\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
37-
| tst.rb:152:11:152:24 | (\\s\|[\\v]\|\\\\v)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000b'. |
38-
| tst.rb:155:11:155:20 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\u000c'. |
36+
| tst.rb:149:11:149:20 | (\\s\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\u000c'. |
37+
| tst.rb:152:11:152:24 | (\\s\|[\\v]\|\\\\v)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\u000b'. |
38+
| tst.rb:155:11:155:20 | (\\f\|[\\f])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\u000c'. |
3939
| tst.rb:158:11:158:18 | (\\W\|\\D)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of ' '. |
4040
| tst.rb:161:11:161:18 | (\\S\|\\w)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |
4141
| tst.rb:164:11:164:20 | (\\S\|[\\w])* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '0'. |

shared/regex/codeql/regex/nfa/NfaUtils.qll

Lines changed: 61 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,10 +1305,67 @@ module Make<RegexTreeViewSig TreeImpl> {
13051305
bindingset[s]
13061306
private string escape(string s) {
13071307
result =
1308-
s.replaceAll("\\", "\\\\")
1309-
.replaceAll("\n", "\\n")
1310-
.replaceAll("\r", "\\r")
1311-
.replaceAll("\t", "\\t")
1308+
escapeUnicodeString(s.replaceAll("\\", "\\\\")
1309+
.replaceAll("\n", "\\n")
1310+
.replaceAll("\r", "\\r")
1311+
.replaceAll("\t", "\\t"))
1312+
}
1313+
1314+
/**
1315+
* Gets a string where the unicode characters in `s` have been escaped.
1316+
*/
1317+
bindingset[s]
1318+
private string escapeUnicodeString(string s) {
1319+
result = concat(int i, string char | char = escapeUnicodeChar(s.charAt(i)) | char order by i)
1320+
}
1321+
1322+
/**
1323+
* Gets a unicode escaped string for `char`.
1324+
* If `char` is a printable char, then `char` is returned.
1325+
*/
1326+
bindingset[char]
1327+
private string escapeUnicodeChar(string char) {
1328+
if isPrintable(char)
1329+
then result = char
1330+
else result = "\\u" + to4digitNumber(toHex(any(int i | i.toUnicode() = char)))
1331+
}
1332+
1333+
/**
1334+
* Gets a string representation of `number` in hexadecimal.
1335+
* Works for the first 200000 numbers, which is enough for every unicode character.
1336+
*/
1337+
private string toHex(int number) {
1338+
number = [0 .. 200000] and
1339+
if number <= 9
1340+
then result = number + ""
1341+
else
1342+
if number <= 15
1343+
then result = "abcdef".charAt(number - 10)
1344+
else result = toHex(number / 16) + toHex(number % 16)
1345+
}
1346+
1347+
/** Gets a string where 0 has been prepended to `num` until it has length 4. */
1348+
bindingset[num]
1349+
private string to4digitNumber(string num) {
1350+
if num.length() >= 4
1351+
then result = num
1352+
else
1353+
if num.length() = 3
1354+
then result = "0" + num
1355+
else
1356+
if num.length() = 2
1357+
then result = "00" + num
1358+
else
1359+
if num.length() = 1
1360+
then result = "000" + num
1361+
else result = "0000"
1362+
}
1363+
1364+
/** Holds if `char` is easily printable char, or whitespace. */
1365+
private predicate isPrintable(string char) {
1366+
exists(ascii(char))
1367+
or
1368+
char = "\n\r\t".charAt(_)
13121369
}
13131370

13141371
/**

0 commit comments

Comments
 (0)