@@ -7,16 +7,29 @@ private import semmle.python.regex
7
7
* An element containing a regular expression term, that is, either
8
8
* a string literal (parsed as a regular expression)
9
9
* or another regular expression term.
10
+ *
11
+ * For sequences and alternations, we require at least one child.
12
+ * Otherwise, we wish to represent the term differently.
13
+ * This avoids multiple representations of the same term.
10
14
*/
11
15
newtype TRegExpParent =
12
16
/** A string literal used as a regular expression */
13
17
TRegExpLiteral ( Regex re ) or
14
18
/** A quantified term */
15
19
TRegExpQuantifier ( Regex re , int start , int end ) { re .qualifiedItem ( start , end , _, _) } or
16
20
/** A sequence term */
17
- TRegExpSequence ( Regex re , int start , int end ) { re .sequence ( start , end ) } or
21
+ TRegExpSequence ( Regex re , int start , int end ) {
22
+ re .sequence ( start , end ) and
23
+ exists ( seqChild ( re , start , end , 1 ) ) // if a sequence does not have more than one element, it should be treated as that element instead.
24
+ } or
18
25
/** An alternatio term */
19
- TRegExpAlt ( Regex re , int start , int end ) { re .alternation ( start , end ) } or
26
+ TRegExpAlt ( Regex re , int start , int end ) {
27
+ re .alternation ( start , end ) and
28
+ exists ( int part_end |
29
+ re .alternationOption ( start , end , start , part_end ) and
30
+ part_end < end
31
+ ) // if an alternation does not have more than one element, it should be treated as that element instead.
32
+ } or
20
33
/** A character class term */
21
34
TRegExpCharacterClass ( Regex re , int start , int end ) { re .charSet ( start , end ) } or
22
35
/** A character range term */
@@ -75,11 +88,7 @@ class RegExpTerm extends RegExpParent {
75
88
int end ;
76
89
77
90
RegExpTerm ( ) {
78
- this = TRegExpAlt ( re , start , end ) and
79
- exists ( int part_end |
80
- re .alternationOption ( start , end , start , part_end ) and
81
- part_end < end
82
- ) // if an alternation does not have more than one element, it should be treated as that element instead.
91
+ this = TRegExpAlt ( re , start , end )
83
92
or
84
93
this = TRegExpBackRef ( re , start , end )
85
94
or
@@ -93,8 +102,7 @@ class RegExpTerm extends RegExpParent {
93
102
or
94
103
this = TRegExpQuantifier ( re , start , end )
95
104
or
96
- this = TRegExpSequence ( re , start , end ) and
97
- exists ( seqChild ( re , start , end , 1 ) ) // if a sequence does not have more than one element, it should be treated as that element instead.
105
+ this = TRegExpSequence ( re , start , end )
98
106
or
99
107
this = TRegExpSpecialChar ( re , start , end )
100
108
}
@@ -341,10 +349,7 @@ class RegExpRange extends RegExpQuantifier {
341
349
* This is a sequence with the elements `(ECMA|Java)` and `Script`.
342
350
*/
343
351
class RegExpSequence extends RegExpTerm , TRegExpSequence {
344
- RegExpSequence ( ) {
345
- this = TRegExpSequence ( re , start , end ) and
346
- exists ( seqChild ( re , start , end , 1 ) ) // if a sequence does not have more than one element, it should be treated as that element instead.
347
- }
352
+ RegExpSequence ( ) { this = TRegExpSequence ( re , start , end ) }
348
353
349
354
override RegExpTerm getChild ( int i ) { result = seqChild ( re , start , end , i ) }
350
355
0 commit comments