@@ -42,27 +42,16 @@ predicate isAlphanumeric(string char) {
42
42
predicate overlap ( RegExpCharacterRange a , RegExpCharacterRange b ) {
43
43
exists ( RegExpCharacterClass clz |
44
44
a = clz .getAChild ( ) and
45
- b = clz .getAChild ( )
45
+ b = clz .getAChild ( ) and
46
+ a != b
46
47
|
47
- // b contains the lower end of a
48
48
exists ( int alow , int ahigh , int blow , int bhigh |
49
49
isRange ( a , alow , ahigh ) and
50
50
isRange ( b , blow , bhigh ) and
51
- blow <= alow and
52
- bhigh >= ahigh
53
- )
54
- or
55
- // b contains the upper end of a
56
- exists ( int blow , int bhigh , int alow , int ahigh |
57
- isRange ( a , alow , ahigh ) and
58
- isRange ( b , blow , bhigh ) and
59
- blow <= ahigh and
60
- bhigh >= ahigh
51
+ alow <= bhigh and
52
+ blow <= ahigh
61
53
)
62
54
)
63
- or
64
- // symmetric overlap
65
- overlap ( b , a )
66
55
}
67
56
68
57
/**
@@ -106,8 +95,8 @@ class OverlyWideRange extends RegExpCharacterRange {
106
95
toCodePoint ( "9" ) >= low and
107
96
toCodePoint ( "A" ) <= high
108
97
or
109
- // any non-alpha numeric as part of the range
110
- not isAlphanumeric ( [ low , high ] .toUnicode ( ) )
98
+ // a non-alphanumeric char as part of the range boundaries
99
+ exists ( int bound | bound = [ low , high ] | not isAlphanumeric ( bound .toUnicode ( ) ) )
111
100
) and
112
101
// allowlist for known ranges
113
102
not this = allowedWideRanges ( )
@@ -141,6 +130,7 @@ private string getInRange(string low, string high) {
141
130
/** A module computing an equivalent character class for an overly wide range. */
142
131
module RangePrinter {
143
132
bindingset [ char]
133
+ bindingset [ result ]
144
134
private string next ( string char ) {
145
135
exists ( int prev , int next |
146
136
prev .toUnicode ( ) = char and
@@ -149,15 +139,6 @@ module RangePrinter {
149
139
)
150
140
}
151
141
152
- bindingset [ char]
153
- private string prev ( string char ) {
154
- exists ( int prev , int next |
155
- prev .toUnicode ( ) = char and
156
- next .toUnicode ( ) = result and
157
- next = prev - 1
158
- )
159
- }
160
-
161
142
/** Gets the points where the parts of the pretty printed range should be cut off. */
162
143
private string cutoffs ( ) { result = [ "A" , "Z" , "a" , "z" , "0" , "9" ] }
163
144
@@ -176,7 +157,7 @@ module RangePrinter {
176
157
result = cut
177
158
or
178
159
cut = [ "A" , "a" , "0" ] and
179
- result = prev ( cut )
160
+ next ( result ) = cut
180
161
}
181
162
182
163
/** Gets the cutoff char used for a given `part` of a range when pretty-printing it. */
@@ -209,25 +190,21 @@ module RangePrinter {
209
190
or
210
191
// middle
211
192
part >= 1 and
212
- part < parts ( range ) and
193
+ part < parts ( range ) - 1 and
213
194
low = lowCut ( cutoff ( range , part - 1 ) ) and
214
195
high = highCut ( cutoff ( range , part ) )
215
196
or
216
197
// last.
217
- part = parts ( range ) and
218
- low = lowCut ( cutoff ( range , part - 2 ) ) and
198
+ part = parts ( range ) - 1 and
199
+ low = lowCut ( cutoff ( range , part - 1 ) ) and
219
200
range .isRange ( _, high )
220
201
}
221
202
222
203
/** Gets an escaped `char` for use in a character class. */
223
204
bindingset [ char]
224
205
private string escape ( string char ) {
225
206
exists ( string reg | reg = "(\\[|\\]|\\\\|-|/)" |
226
- char .regexpMatch ( reg ) and
227
- result = "\\" + char
228
- or
229
- not char .regexpMatch ( reg ) and
230
- result = char
207
+ if char .regexpMatch ( reg ) then result = "\\" + char else result = char
231
208
)
232
209
}
233
210
@@ -247,16 +224,19 @@ module RangePrinter {
247
224
/** Gets the entire pretty printed equivalent range. */
248
225
string printEquivalentCharClass ( OverlyWideRange range ) {
249
226
result =
250
- "[" +
251
- strictconcat ( string r , int part |
252
- r = printEquivalentCharClass ( range , part )
253
- |
254
- r order by part
255
- ) + "]"
227
+ strictconcat ( string r , int part |
228
+ r = "[" and part = - 1 and exists ( range )
229
+ or
230
+ r = printEquivalentCharClass ( range , part )
231
+ or
232
+ r = "]" and part = parts ( range )
233
+ |
234
+ r order by part
235
+ )
256
236
}
257
237
}
258
238
259
- /** Gets a char range that is suspiciously because of `reason`. */
239
+ /** Gets a char range that is overly large because of `reason`. */
260
240
RegExpCharacterRange getABadRange ( string reason , int priority ) {
261
241
priority = 0 and
262
242
reason = "is equivalent to " + result .( OverlyWideRange ) .printEquivalent ( )
@@ -285,7 +265,6 @@ RegExpCharacterRange getABadRange(string reason, int priority) {
285
265
286
266
/** Holds if `range` matches suspiciously many characters. */
287
267
predicate problem ( RegExpCharacterRange range , string reason ) {
288
- range = getABadRange ( _, _) and
289
268
reason =
290
269
strictconcat ( string m , int priority |
291
270
range = getABadRange ( m , priority )
0 commit comments