@@ -382,7 +382,7 @@ abstract class RegexString extends Expr {
382
382
not c = "[" and
383
383
not c = ")" and
384
384
not c = "|" and
385
- not this .qualifier ( start , _, _)
385
+ not this .qualifier ( start , _, _, _ )
386
386
)
387
387
}
388
388
@@ -688,41 +688,51 @@ abstract class RegexString extends Expr {
688
688
this .backreference ( start , end )
689
689
}
690
690
691
- private predicate qualifier ( int start , int end , boolean maybe_empty ) {
692
- this .short_qualifier ( start , end , maybe_empty ) and not this .getChar ( end ) = "?"
691
+ private predicate qualifier ( int start , int end , boolean maybe_empty , boolean may_repeat_forever ) {
692
+ this .short_qualifier ( start , end , maybe_empty , may_repeat_forever ) and
693
+ not this .getChar ( end ) = "?"
693
694
or
694
- exists ( int short_end | this .short_qualifier ( start , short_end , maybe_empty ) |
695
+ exists ( int short_end | this .short_qualifier ( start , short_end , maybe_empty , may_repeat_forever ) |
695
696
if this .getChar ( short_end ) = "?" then end = short_end + 1 else end = short_end
696
697
)
697
698
}
698
699
699
- private predicate short_qualifier ( int start , int end , boolean maybe_empty ) {
700
+ private predicate short_qualifier (
701
+ int start , int end , boolean maybe_empty , boolean may_repeat_forever
702
+ ) {
700
703
(
701
- this .getChar ( start ) = "+" and maybe_empty = false
704
+ this .getChar ( start ) = "+" and maybe_empty = false and may_repeat_forever = true
702
705
or
703
- this .getChar ( start ) = "*" and maybe_empty = true
706
+ this .getChar ( start ) = "*" and maybe_empty = true and may_repeat_forever = true
704
707
or
705
- this .getChar ( start ) = "?" and maybe_empty = true
708
+ this .getChar ( start ) = "?" and maybe_empty = true and may_repeat_forever = false
706
709
) and
707
710
end = start + 1
708
711
or
709
- exists ( int endin | end = endin + 1 |
710
- this .getChar ( start ) = "{" and
711
- this .getChar ( endin ) = "}" and
712
- end > start and
713
- exists ( string multiples | multiples = this .getText ( ) .substring ( start + 1 , endin ) |
714
- multiples .regexpMatch ( "0+" ) and maybe_empty = true
715
- or
716
- multiples .regexpMatch ( "0*,[0-9]*" ) and maybe_empty = true
717
- or
718
- multiples .regexpMatch ( "0*[1-9][0-9]*" ) and maybe_empty = false
719
- or
720
- multiples .regexpMatch ( "0*[1-9][0-9]*,[0-9]*" ) and maybe_empty = false
721
- ) and
722
- not exists ( int mid |
723
- this .getChar ( mid ) = "}" and
724
- mid > start and
725
- mid < endin
712
+ exists ( string lower , string upper |
713
+ this .multiples ( start , end , lower , upper ) and
714
+ ( if lower = "" or lower .toInt ( ) = 0 then maybe_empty = true else maybe_empty = false ) and
715
+ if upper = "" then may_repeat_forever = true else may_repeat_forever = false
716
+ )
717
+ }
718
+
719
+ /**
720
+ * Holds if a repetition quantifier is found between `start` and `end`,
721
+ * with the given lower and upper bounds. If a bound is omitted, the corresponding
722
+ * string is empty.
723
+ */
724
+ predicate multiples ( int start , int end , string lower , string upper ) {
725
+ this .getChar ( start ) = "{" and
726
+ this .getChar ( end - 1 ) = "}" and
727
+ exists ( string inner | inner = this .getText ( ) .substring ( start + 1 , end - 1 ) |
728
+ inner .regexpMatch ( "[0-9]+" ) and
729
+ lower = inner and
730
+ upper = lower
731
+ or
732
+ inner .regexpMatch ( "[0-9]*,[0-9]*" ) and
733
+ exists ( int commaIndex | commaIndex = inner .indexOf ( "," ) |
734
+ lower = inner .prefix ( commaIndex ) and
735
+ upper = inner .suffix ( commaIndex + 1 )
726
736
)
727
737
)
728
738
}
@@ -731,19 +741,29 @@ abstract class RegexString extends Expr {
731
741
* Whether the text in the range start,end is a qualified item, where item is a character,
732
742
* a character set or a group.
733
743
*/
734
- predicate qualifiedItem ( int start , int end , boolean maybe_empty ) {
735
- this .qualifiedPart ( start , _, end , maybe_empty )
744
+ predicate qualifiedItem ( int start , int end , boolean maybe_empty , boolean may_repeat_forever ) {
745
+ this .qualifiedPart ( start , _, end , maybe_empty , may_repeat_forever )
736
746
}
737
747
738
- private predicate qualifiedPart ( int start , int part_end , int end , boolean maybe_empty ) {
748
+ /**
749
+ * Holds if a qualified part is found between `start` and `part_end` and the qualifier is
750
+ * found between `part_end` and `end`.
751
+ *
752
+ * `maybe_empty` is true if the part is optional.
753
+ * `may_repeat_forever` is true if the part may be repeated unboundedly.
754
+ */
755
+ predicate qualifiedPart (
756
+ int start , int part_end , int end , boolean maybe_empty , boolean may_repeat_forever
757
+ ) {
739
758
this .baseItem ( start , part_end ) and
740
- this .qualifier ( part_end , end , maybe_empty )
759
+ this .qualifier ( part_end , end , maybe_empty , may_repeat_forever )
741
760
}
742
761
743
- private predicate item ( int start , int end ) {
744
- this .qualifiedItem ( start , end , _)
762
+ /** Holds if the range `start`, `end` contains a character, a quantifier, a character set or a group. */
763
+ predicate item ( int start , int end ) {
764
+ this .qualifiedItem ( start , end , _, _)
745
765
or
746
- this .baseItem ( start , end ) and not this .qualifier ( end , _, _)
766
+ this .baseItem ( start , end ) and not this .qualifier ( end , _, _, _ )
747
767
}
748
768
749
769
private predicate subsequence ( int start , int end ) {
@@ -766,7 +786,7 @@ abstract class RegexString extends Expr {
766
786
*/
767
787
predicate sequence ( int start , int end ) {
768
788
this .sequenceOrQualified ( start , end ) and
769
- not this .qualifiedItem ( start , end , _)
789
+ not this .qualifiedItem ( start , end , _, _ )
770
790
}
771
791
772
792
private predicate sequenceOrQualified ( int start , int end ) {
@@ -777,7 +797,8 @@ abstract class RegexString extends Expr {
777
797
private predicate item_start ( int start ) {
778
798
this .character ( start , _) or
779
799
this .isGroupStart ( start ) or
780
- this .charSet ( start , _)
800
+ this .charSet ( start , _) or
801
+ this .backreference ( start , _)
781
802
}
782
803
783
804
private predicate item_end ( int end ) {
@@ -787,7 +808,7 @@ abstract class RegexString extends Expr {
787
808
or
788
809
this .charSet ( _, end )
789
810
or
790
- this .qualifier ( _, end , _)
811
+ this .qualifier ( _, end , _, _ )
791
812
}
792
813
793
814
private predicate top_level ( int start , int end ) {
@@ -839,14 +860,14 @@ abstract class RegexString extends Expr {
839
860
or
840
861
exists ( int x | this .firstPart ( x , end ) |
841
862
this .emptyMatchAtStartGroup ( x , start ) or
842
- this .qualifiedItem ( x , start , true ) or
863
+ this .qualifiedItem ( x , start , true , _ ) or
843
864
this .specialCharacter ( x , start , "^" )
844
865
)
845
866
or
846
867
exists ( int y | this .firstPart ( start , y ) |
847
868
this .item ( start , end )
848
869
or
849
- this .qualifiedPart ( start , end , y , _)
870
+ this .qualifiedPart ( start , end , y , _, _ )
850
871
)
851
872
or
852
873
exists ( int x , int y | this .firstPart ( x , y ) |
@@ -863,7 +884,7 @@ abstract class RegexString extends Expr {
863
884
exists ( int y | this .lastPart ( start , y ) |
864
885
this .emptyMatchAtEndGroup ( end , y )
865
886
or
866
- this .qualifiedItem ( end , y , true )
887
+ this .qualifiedItem ( end , y , true , _ )
867
888
or
868
889
this .specialCharacter ( end , y , "$" )
869
890
or
@@ -875,7 +896,7 @@ abstract class RegexString extends Expr {
875
896
this .item ( start , end )
876
897
)
877
898
or
878
- exists ( int y | this .lastPart ( start , y ) | this .qualifiedPart ( start , end , y , _) )
899
+ exists ( int y | this .lastPart ( start , y ) | this .qualifiedPart ( start , end , y , _, _ ) )
879
900
or
880
901
exists ( int x , int y | this .lastPart ( x , y ) |
881
902
this .groupContents ( x , y , start , end )
@@ -892,7 +913,7 @@ abstract class RegexString extends Expr {
892
913
(
893
914
this .character ( start , end )
894
915
or
895
- this .qualifiedItem ( start , end , _)
916
+ this .qualifiedItem ( start , end , _, _ )
896
917
or
897
918
this .charSet ( start , end )
898
919
) and
@@ -907,7 +928,7 @@ abstract class RegexString extends Expr {
907
928
(
908
929
this .character ( start , end )
909
930
or
910
- this .qualifiedItem ( start , end , _)
931
+ this .qualifiedItem ( start , end , _, _ )
911
932
or
912
933
this .charSet ( start , end )
913
934
) and
0 commit comments