@@ -72,6 +72,49 @@ private int ascii(string char) {
72
72
)
73
73
}
74
74
75
+ /**
76
+ * Holds if `t` matches at least an epsilon symbol.
77
+ *
78
+ * That is, this term does not restrict the language of the enclosing regular expression.
79
+ *
80
+ * This is implemented as an under-approximation, and this predicate does not hold for sub-patterns in particular.
81
+ */
82
+ predicate matchesEpsilon ( RegExpTerm t ) {
83
+ t instanceof RegExpStar
84
+ or
85
+ t instanceof RegExpOpt
86
+ or
87
+ t .( RegExpRange ) .getLowerBound ( ) = 0
88
+ or
89
+ exists ( RegExpTerm child |
90
+ child = t .getAChild ( ) and
91
+ matchesEpsilon ( child )
92
+ |
93
+ t instanceof RegExpAlt or
94
+ t instanceof RegExpGroup or
95
+ t instanceof RegExpPlus or
96
+ t instanceof RegExpRange
97
+ )
98
+ or
99
+ matchesEpsilon ( t .( RegExpBackRef ) .getGroup ( ) )
100
+ or
101
+ forex ( RegExpTerm child | child = t .( RegExpSequence ) .getAChild ( ) | matchesEpsilon ( child ) )
102
+ }
103
+
104
+ /**
105
+ * A lookahead/lookbehind that matches the empty string.
106
+ */
107
+ class EmptyPositiveSubPatttern extends RegExpSubPattern {
108
+ EmptyPositiveSubPatttern ( ) {
109
+ (
110
+ this instanceof RegExpPositiveLookahead
111
+ or
112
+ this instanceof RegExpPositiveLookbehind
113
+ ) and
114
+ matchesEpsilon ( this .getOperand ( ) )
115
+ }
116
+ }
117
+
75
118
/**
76
119
* A branch in a disjunction that is the root node in a literal, or a literal
77
120
* whose root node is not a disjunction.
@@ -659,6 +702,10 @@ predicate delta(State q1, EdgeLabel lbl, State q2) {
659
702
exists ( RegExpDollar dollar | q1 = before ( dollar ) |
660
703
lbl = Epsilon ( ) and q2 = Accept ( getRoot ( dollar ) )
661
704
)
705
+ or
706
+ exists ( EmptyPositiveSubPatttern empty | q1 = before ( empty ) |
707
+ lbl = Epsilon ( ) and q2 = after ( empty )
708
+ )
662
709
}
663
710
664
711
/**
0 commit comments