Skip to content

Commit a9d40d3

Browse files
authored
Merge pull request github#12550 from erik-krogh/useNumberUtil
Java/Python: use Number.qll to parse hex numbers in regex parsing
2 parents 0f813ce + ef49802 commit a9d40d3

File tree

4 files changed

+7
-72
lines changed

4 files changed

+7
-72
lines changed

java/ql/lib/semmle/code/java/regex/RegexTreeView.qll

Lines changed: 3 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,8 @@ module Impl implements RegexTreeViewSig {
470470
override string getPrimaryQLClass() { result = "RegExpAlt" }
471471
}
472472

473+
private import codeql.util.Numbers as Numbers
474+
473475
/**
474476
* An escaped regular expression term, that is, a regular expression
475477
* term starting with a backslash, which is not a backreference.
@@ -531,11 +533,7 @@ module Impl implements RegexTreeViewSig {
531533
* Gets the unicode char for this escape.
532534
* E.g. for `\u0061` this returns "a".
533535
*/
534-
private string getUnicode() {
535-
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
536-
result = codepoint.toUnicode()
537-
)
538-
}
536+
private string getUnicode() { result = Numbers::parseHexInt(this.getHexString()).toUnicode() }
539537

540538
/** Gets the part of this escape that is a hexidecimal string */
541539
private string getHexString() {
@@ -547,18 +545,6 @@ module Impl implements RegexTreeViewSig {
547545
then result = this.getText().substring(3, this.getText().length() - 1)
548546
else result = this.getText().suffix(2) // \xhh
549547
}
550-
551-
/**
552-
* Gets int value for the `index`th char in the hex number of the unicode escape.
553-
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
554-
*/
555-
private int getHexValueFromUnicode(int index) {
556-
this.isUnicode() and
557-
exists(string hex, string char | hex = this.getHexString() |
558-
char = hex.charAt(index) and
559-
result = 16.pow(hex.length() - index - 1) * toHex(char)
560-
)
561-
}
562548
}
563549

564550
/**
@@ -586,25 +572,6 @@ module Impl implements RegexTreeViewSig {
586572
RegExpNonWordBoundary() { this.getChar() = "\\B" }
587573
}
588574

589-
/**
590-
* Gets the hex number for the `hex` char.
591-
*/
592-
private int toHex(string hex) {
593-
result = [0 .. 9] and hex = result.toString()
594-
or
595-
result = 10 and hex = ["a", "A"]
596-
or
597-
result = 11 and hex = ["b", "B"]
598-
or
599-
result = 12 and hex = ["c", "C"]
600-
or
601-
result = 13 and hex = ["d", "D"]
602-
or
603-
result = 14 and hex = ["e", "E"]
604-
or
605-
result = 15 and hex = ["f", "F"]
606-
}
607-
608575
/**
609576
* A character class escape in a regular expression.
610577
* That is, an escaped character that denotes multiple characters.

python/ql/lib/qlpack.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ library: true
77
upgrades: upgrades
88
dependencies:
99
codeql/regex: ${workspace}
10+
codeql/util: ${workspace}
1011
codeql/tutorial: ${workspace}
1112
dataExtensions:
1213
- semmle/python/frameworks/**/model.yml

python/ql/lib/semmle/python/RegexTreeView.qll

Lines changed: 3 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,8 @@ module Impl implements RegexTreeViewSig {
468468
*/
469469
class RegExpCharEscape = RegExpEscape;
470470

471+
private import codeql.util.Numbers as Numbers
472+
471473
/**
472474
* An escaped regular expression term, that is, a regular expression
473475
* term starting with a backslash, which is not a backreference.
@@ -528,42 +530,8 @@ module Impl implements RegexTreeViewSig {
528530
* E.g. for `\u0061` this returns "a".
529531
*/
530532
private string getUnicode() {
531-
exists(int codepoint | codepoint = sum(this.getHexValueFromUnicode(_)) |
532-
result = codepoint.toUnicode()
533-
)
533+
result = Numbers::parseHexInt(this.getText().suffix(2)).toUnicode()
534534
}
535-
536-
/**
537-
* Gets int value for the `index`th char in the hex number of the unicode escape.
538-
* E.g. for `\u0061` and `index = 2` this returns 96 (the number `6` interpreted as hex).
539-
*/
540-
private int getHexValueFromUnicode(int index) {
541-
this.isUnicode() and
542-
exists(string hex, string char | hex = this.getText().suffix(2) |
543-
char = hex.charAt(index) and
544-
result = 16.pow(hex.length() - index - 1) * toHex(char)
545-
)
546-
}
547-
}
548-
549-
/**
550-
* Gets the hex number for the `hex` char.
551-
*/
552-
private int toHex(string hex) {
553-
hex = [0 .. 9].toString() and
554-
result = hex.toInt()
555-
or
556-
result = 10 and hex = ["a", "A"]
557-
or
558-
result = 11 and hex = ["b", "B"]
559-
or
560-
result = 12 and hex = ["c", "C"]
561-
or
562-
result = 13 and hex = ["d", "D"]
563-
or
564-
result = 14 and hex = ["e", "E"]
565-
or
566-
result = 15 and hex = ["f", "F"]
567535
}
568536

569537
/**

python/ql/src/qlpack.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ groups:
66
dependencies:
77
codeql/python-all: ${workspace}
88
codeql/suite-helpers: ${workspace}
9-
codeql/util: ${workspace}
109
suites: codeql-suites
1110
extractor: python
1211
defaultSuiteFile: codeql-suites/python-code-scanning.qls

0 commit comments

Comments
 (0)