Skip to content

Commit 440e4b9

Browse files
committed
enable unicode support in the Python ReDoS query
1 parent 1d56748 commit 440e4b9

File tree

3 files changed

+44
-39
lines changed

3 files changed

+44
-39
lines changed

python/ql/src/semmle/python/RegexTreeView.qll

Lines changed: 37 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -473,46 +473,44 @@ class RegExpEscape extends RegExpNormalChar {
473473
* E.g. for `\u0061` this returns "a".
474474
*/
475475
private string getUnicode() {
476-
// TODO: Enable this once a supporting CLI is released.
477-
// exists(int codepoint | codepoint = sum(getHexValueFromUnicode(_)) |
478-
// result = codepoint.toUnicode()
479-
// )
480-
none()
476+
exists(int codepoint | codepoint = sum(getHexValueFromUnicode(_)) |
477+
result = codepoint.toUnicode()
478+
)
479+
}
480+
481+
/**
482+
* Gets int value for the `index`th char in the hex number of the unicode escape.
483+
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
484+
*/
485+
private int getHexValueFromUnicode(int index) {
486+
isUnicode() and
487+
exists(string hex, string char | hex = getText().suffix(2) |
488+
char = hex.charAt(index) and
489+
result = 16.pow(hex.length() - index - 1) * toHex(char)
490+
)
481491
}
482-
// TODO: Enable this once a supporting CLI is released.
483-
// /**
484-
// * Gets int value for the `index`th char in the hex number of the unicode escape.
485-
// * E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
486-
// */
487-
// private int getHexValueFromUnicode(int index) {
488-
// isUnicode() and
489-
// exists(string hex, string char | hex = getText().suffix(2) |
490-
// char = hex.charAt(index) and
491-
// result = 16.pow(hex.length() - index - 1) * toHex(char)
492-
// )
493-
// }
494-
}
495-
496-
// TODO: Enable this once a supporting CLI is released.
497-
// /**
498-
// * Gets the hex number for the `hex` char.
499-
// */
500-
// private int toHex(string hex) {
501-
// hex = [0 .. 9].toString() and
502-
// result = hex.toInt()
503-
// or
504-
// result = 10 and hex = ["a", "A"]
505-
// or
506-
// result = 11 and hex = ["b", "B"]
507-
// or
508-
// result = 12 and hex = ["c", "C"]
509-
// or
510-
// result = 13 and hex = ["d", "D"]
511-
// or
512-
// result = 14 and hex = ["e", "E"]
513-
// or
514-
// result = 15 and hex = ["f", "F"]
515-
// }
492+
}
493+
494+
/**
495+
* Gets the hex number for the `hex` char.
496+
*/
497+
private int toHex(string hex) {
498+
hex = [0 .. 9].toString() and
499+
result = hex.toInt()
500+
or
501+
result = 10 and hex = ["a", "A"]
502+
or
503+
result = 11 and hex = ["b", "B"]
504+
or
505+
result = 12 and hex = ["c", "C"]
506+
or
507+
result = 13 and hex = ["d", "D"]
508+
or
509+
result = 14 and hex = ["e", "E"]
510+
or
511+
result = 15 and hex = ["f", "F"]
512+
}
513+
516514
/**
517515
* A character class escape in a regular expression.
518516
* That is, an escaped charachter that denotes multiple characters.

python/ql/test/query-tests/Security/CWE-730/ReDoS.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,5 +92,6 @@
9292
| redos.py:363:25:363:43 | ((?:a{0\|-)\|\\w\\{\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0'. |
9393
| redos.py:364:25:364:45 | ((?:a{0,\|-)\|\\w\\{\\d,)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,'. |
9494
| redos.py:365:25:365:48 | ((?:a{0,2\|-)\|\\w\\{\\d,\\d)+ | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of 'a{0,2'. |
95+
| redos.py:371:25:371:35 | (\\u0061\|a)* | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of 'a'. |
9596
| unittests.py:5:17:5:23 | (\u00c6\|\\\u00c6)+ | This part of the regular expression may cause exponential backtracking on strings starting with 'X' and containing many repetitions of '\u00c6'. |
9697
| unittests.py:9:16:9:24 | (?:.\|\\n)* | This part of the regular expression may cause exponential backtracking on strings containing many repetitions of '\\n'. |

python/ql/test/query-tests/Security/CWE-730/redos.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,3 +366,9 @@
366366

367367
# GOOD:
368368
good42 = re.compile(r'''^((?:a{0,2}|-)|\w\{\d,\d\})+X$''')
369+
370+
# NOT GOOD
371+
bad87 = re.compile(r'X(\u0061|a)*Y')
372+
373+
# GOOD
374+
good43 = re.compile(r'X(\u0061|b)+Y')

0 commit comments

Comments
 (0)