Skip to content

Commit f5daee2

Browse files
committed
port canonicalization fix from github#11071 to the shared pack
1 parent 09275a5 commit f5daee2

File tree

1 file changed

+23
-19
lines changed

1 file changed

+23
-19
lines changed

shared/regex/codeql/regex/nfa/NfaUtils.qll

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -164,19 +164,20 @@ module Make<RegexTreeViewSig TreeImpl> {
164164
private predicate isCanonicalTerm(RelevantRegExpTerm term, string str) {
165165
term =
166166
min(RelevantRegExpTerm t |
167-
str = t.getRawValue() + "|" + getCanonicalizationFlags(t.getRootTerm())
167+
str = getCanonicalizationString(t)
168168
|
169169
t order by getTermLocationString(t)
170170
)
171171
}
172172

173173
/**
174-
* Gets a string representation of the flags used with the regular expression.
175-
* Only the flags that are relevant for the canonicalization are included.
174+
* Gets a string representation of `term` that is used for canonicalization.
176175
*/
177-
string getCanonicalizationFlags(RegExpTerm root) {
178-
root.isRootTerm() and
179-
(if isIgnoreCase(root) then result = "i" else result = "")
176+
private string getCanonicalizationString(RelevantRegExpTerm term) {
177+
exists(string ignoreCase |
178+
(if isIgnoreCase(term.getRootTerm()) then ignoreCase = "i" else ignoreCase = "") and
179+
result = term.getRawValue() + "|" + ignoreCase
180+
)
180181
}
181182

182183
/**
@@ -221,12 +222,19 @@ module Make<RegexTreeViewSig TreeImpl> {
221222
Epsilon()
222223

223224
/**
224-
* Gets the canonical CharClass for `term`.
225+
* Gets the the CharClass corresponding to the canonical representative `term`.
225226
*/
226-
CharClass getCanonicalCharClass(RegExpTerm term) {
227+
private CharClass getCharClassForCanonicalTerm(RegExpTerm term) {
227228
exists(string str | isCanonicalTerm(term, str) | result = CharClass(str))
228229
}
229230

231+
/**
232+
* Gets a char class that represents `term`, even when `term` is not the canonical representative.
233+
*/
234+
CharacterClass getCanonicalCharClass(RegExpTerm term) {
235+
exists(string str | str = getCanonicalizationString(term) and result = CharClass(str))
236+
}
237+
230238
/**
231239
* Holds if `a` and `b` are input symbols from the same regexp.
232240
*/
@@ -319,7 +327,7 @@ module Make<RegexTreeViewSig TreeImpl> {
319327
*/
320328
pragma[noinline]
321329
predicate hasChildThatMatchesIgnoringCasingFlags(RegExpCharacterClass cc, string char) {
322-
exists(getCanonicalCharClass(cc)) and
330+
exists(getCharClassForCanonicalTerm(cc)) and
323331
exists(RegExpTerm child | child = cc.getAChild() |
324332
char = child.(RegexpCharacterConstant).getValue()
325333
or
@@ -420,7 +428,7 @@ module Make<RegexTreeViewSig TreeImpl> {
420428
private class PositiveCharacterClass extends CharacterClass {
421429
RegExpCharacterClass cc;
422430

423-
PositiveCharacterClass() { this = getCanonicalCharClass(cc) and not cc.isInverted() }
431+
PositiveCharacterClass() { this = getCharClassForCanonicalTerm(cc) and not cc.isInverted() }
424432

425433
override string getARelevantChar() { result = caseNormalize(getAMentionedChar(cc), cc) }
426434

@@ -433,7 +441,7 @@ module Make<RegexTreeViewSig TreeImpl> {
433441
private class InvertedCharacterClass extends CharacterClass {
434442
RegExpCharacterClass cc;
435443

436-
InvertedCharacterClass() { this = getCanonicalCharClass(cc) and cc.isInverted() }
444+
InvertedCharacterClass() { this = getCharClassForCanonicalTerm(cc) and cc.isInverted() }
437445

438446
override string getARelevantChar() {
439447
result = nextChar(caseNormalize(getAMentionedChar(cc), cc)) or
@@ -468,7 +476,7 @@ module Make<RegexTreeViewSig TreeImpl> {
468476

469477
PositiveCharacterClassEscape() {
470478
isEscapeClass(cc, charClass) and
471-
this = getCanonicalCharClass(cc) and
479+
this = getCharClassForCanonicalTerm(cc) and
472480
charClass = ["d", "s", "w"]
473481
}
474482

@@ -508,7 +516,7 @@ module Make<RegexTreeViewSig TreeImpl> {
508516
NegativeCharacterClassEscape() {
509517
exists(RegExpTerm cc |
510518
isEscapeClass(cc, charClass) and
511-
this = getCanonicalCharClass(cc) and
519+
this = getCharClassForCanonicalTerm(cc) and
512520
charClass = ["D", "S", "W"]
513521
)
514522
}
@@ -703,17 +711,13 @@ module Make<RegexTreeViewSig TreeImpl> {
703711
cc.isUniversalClass() and q1 = before(cc) and lbl = Any() and q2 = after(cc)
704712
or
705713
q1 = before(cc) and
706-
lbl =
707-
CharacterClasses::normalize(CharClass(cc.getRawValue() + "|" +
708-
getCanonicalizationFlags(cc.getRootTerm()))) and
714+
lbl = CharacterClasses::normalize(CharClass(getCanonicalizationString(cc))) and
709715
q2 = after(cc)
710716
)
711717
or
712718
exists(RegExpTerm cc | isEscapeClass(cc, _) |
713719
q1 = before(cc) and
714-
lbl =
715-
CharacterClasses::normalize(CharClass(cc.getRawValue() + "|" +
716-
getCanonicalizationFlags(cc.getRootTerm()))) and
720+
lbl = CharacterClasses::normalize(CharClass(getCanonicalizationString(cc))) and
717721
q2 = after(cc)
718722
)
719723
or

0 commit comments

Comments
 (0)