Skip to content

Commit 34d7772

Browse files
committed
Python: Move constraints into pranch charpreds
For sequences and alternations, we require at least one child. Otherwise, we wish to represent the term differently. This avoids multiple representations.
1 parent c455483 commit 34d7772

File tree

1 file changed

+18
-13
lines changed

1 file changed

+18
-13
lines changed

python/ql/src/semmle/python/RegexTreeView.qll

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,29 @@ private import semmle.python.regex
77
* An element containing a regular expression term, that is, either
88
* a string literal (parsed as a regular expression)
99
* or another regular expression term.
10+
*
11+
* For sequences and alternations, we require at least one child.
12+
* Otherwise, we wish to represent the term differently.
13+
* This avoids multiple representations of the same term.
1014
*/
1115
newtype TRegExpParent =
1216
/** A string literal used as a regular expression */
1317
TRegExpLiteral(Regex re) or
1418
/** A quantified term */
1519
TRegExpQuantifier(Regex re, int start, int end) { re.qualifiedItem(start, end, _, _) } or
1620
/** A sequence term */
17-
TRegExpSequence(Regex re, int start, int end) { re.sequence(start, end) } or
21+
TRegExpSequence(Regex re, int start, int end) {
22+
re.sequence(start, end) and
23+
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
24+
} or
1825
/** An alternatio term */
19-
TRegExpAlt(Regex re, int start, int end) { re.alternation(start, end) } or
26+
TRegExpAlt(Regex re, int start, int end) {
27+
re.alternation(start, end) and
28+
exists(int part_end |
29+
re.alternationOption(start, end, start, part_end) and
30+
part_end < end
31+
) // if an alternation does not have more than one element, it should be treated as that element instead.
32+
} or
2033
/** A character class term */
2134
TRegExpCharacterClass(Regex re, int start, int end) { re.charSet(start, end) } or
2235
/** A character range term */
@@ -75,11 +88,7 @@ class RegExpTerm extends RegExpParent {
7588
int end;
7689

7790
RegExpTerm() {
78-
this = TRegExpAlt(re, start, end) and
79-
exists(int part_end |
80-
re.alternationOption(start, end, start, part_end) and
81-
part_end < end
82-
) // if an alternation does not have more than one element, it should be treated as that element instead.
91+
this = TRegExpAlt(re, start, end)
8392
or
8493
this = TRegExpBackRef(re, start, end)
8594
or
@@ -93,8 +102,7 @@ class RegExpTerm extends RegExpParent {
93102
or
94103
this = TRegExpQuantifier(re, start, end)
95104
or
96-
this = TRegExpSequence(re, start, end) and
97-
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
105+
this = TRegExpSequence(re, start, end)
98106
or
99107
this = TRegExpSpecialChar(re, start, end)
100108
}
@@ -341,10 +349,7 @@ class RegExpRange extends RegExpQuantifier {
341349
* This is a sequence with the elements `(ECMA|Java)` and `Script`.
342350
*/
343351
class RegExpSequence extends RegExpTerm, TRegExpSequence {
344-
RegExpSequence() {
345-
this = TRegExpSequence(re, start, end) and
346-
exists(seqChild(re, start, end, 1)) // if a sequence does not have more than one element, it should be treated as that element instead.
347-
}
352+
RegExpSequence() { this = TRegExpSequence(re, start, end) }
348353

349354
override RegExpTerm getChild(int i) { result = seqChild(re, start, end, i) }
350355

0 commit comments

Comments
 (0)