Skip to content

Commit 72986e1

Browse files
committed
Python: Add some comments on the booelan sweep
pattern
1 parent 4ca0ee8 commit 72986e1

File tree

1 file changed

+28
-4
lines changed

1 file changed

+28
-4
lines changed

python/ql/src/semmle/python/regex.qll

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,17 @@ abstract class RegexString extends Expr {
127127
result = this.(Unicode).getText()
128128
}
129129

130-
/** result is true for those start chars that actually mark a start of a char set. */
130+
/**
131+
* Helper predicate for `char_set_start(int start, int end)`.
132+
*
133+
* In order to identify left brackets ('[') which actually start a character class,
134+
* we perform a left to right scan of the string.
135+
*
136+
* To avoid negative recursion we return a boolean. See `escaping`,
137+
* the helper for `escapingChar`, for a clean use of this pattern.
138+
*
139+
* result is true for those start chars that actually mark a start of a char set.
140+
*/
131141
boolean char_set_start(int pos) {
132142
exists(int index |
133143
// is opening bracket
@@ -176,9 +186,9 @@ abstract class RegexString extends Expr {
176186
)
177187
}
178188

179-
/**
180-
* Helper predicate for chars that could be character-set delimiters.
181-
* Holds if the (non-escaped) char at `pos` in the string, is the (one-based) `index` occurrence of a bracket (`[` or `]`) in the string.
189+
/**
190+
* Helper predicate for chars that could be character-set delimiters.
191+
* Holds if the (non-escaped) char at `pos` in the string, is the (one-based) `index` occurrence of a bracket (`[` or `]`) in the string.
182192
* Result if `true` is the char is `[`, and `false` if the char is `]`.
183193
*/
184194
boolean char_set_delimiter(int index, int pos) {
@@ -267,6 +277,13 @@ abstract class RegexString extends Expr {
267277
)
268278
}
269279

280+
/**
281+
* Helper predicate for `charRange`.
282+
* We can determine where character ranges end by a left to right sweep.
283+
*
284+
* To avoid negative recursion we return a boolean. See `escaping`,
285+
* the helper for `escapingChar`, for a clean use of this pattern.
286+
*/
270287
private boolean charRangeEnd(int charset_start, int index) {
271288
this.char_set_token(charset_start, index, _, _) and
272289
(
@@ -290,8 +307,15 @@ abstract class RegexString extends Expr {
290307
)
291308
}
292309

310+
/** Holds if the character at `pos` is a "\" that is actually escaping what comes after. */
293311
predicate escapingChar(int pos) { this.escaping(pos) = true }
294312

313+
/**
314+
* Helper predicate for `escapingChar`.
315+
* In order to avoid negative recusrion, we return a boolean.
316+
* This way, we can refer to `escaping(pos - 1).booleanNot()`
317+
* rather than to a negated version of `escaping(pos)`.
318+
*/
295319
private boolean escaping(int pos) {
296320
pos = -1 and result = false
297321
or

0 commit comments

Comments
 (0)