Skip to content

Commit 5044f89

Browse files
committed
Ruby/Python re-introduce normalCharacterSequence
1 parent 9d9abaf commit 5044f89

File tree

5 files changed

+30
-24
lines changed

5 files changed

+30
-24
lines changed

python/ql/lib/semmle/python/RegexTreeView.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ newtype TRegExpParent =
4040
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
4141
/** A normal character */
4242
TRegExpNormalChar(Regex re, int start, int end) {
43-
re.simpleCharacterSequence(start, end)
43+
re.normalCharacterSequence(start, end)
4444
or
4545
re.escapedCharacter(start, end) and
4646
not re.specialCharacter(start, end, _)

python/ql/lib/semmle/python/regex.qll

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,7 @@ abstract class RegexString extends Expr {
427427
}
428428

429429
predicate normalCharacter(int start, int end) {
430+
end = start + 1 and
430431
this.character(start, end) and
431432
not this.specialCharacter(start, end, _)
432433
}
@@ -447,16 +448,16 @@ abstract class RegexString extends Expr {
447448
}
448449

449450
/**
450-
* A sequence of 'simple' characters.
451+
* Holds if the range [start:end) consists of only 'normal' characters.
451452
*/
452-
predicate simpleCharacterSequence(int start, int end) {
453-
// a simple character inside a character set is interpreted on its own
454-
this.simpleCharacter(start, end) and
453+
predicate normalCharacterSequence(int start, int end) {
454+
// a normal character inside a character set is interpreted on its own
455+
this.normalCharacter(start, end) and
455456
this.inCharSet(start)
456457
or
457-
// a maximal run of simple characters is considered as one constant
458+
// a maximal run of normal characters is considered as one constant
458459
exists(int s, int e |
459-
e = max(int i | simpleCharacterRun(s, i)) and
460+
e = max(int i | this.normalCharacterRun(s, i)) and
460461
not this.inCharSet(s)
461462
|
462463
// 'abc' can be considered one constant, but
@@ -474,17 +475,17 @@ abstract class RegexString extends Expr {
474475
)
475476
}
476477

477-
private predicate simpleCharacterRun(int start, int end) {
478+
private predicate normalCharacterRun(int start, int end) {
478479
(
479-
simpleCharacterRun(start, end - 1)
480+
this.normalCharacterRun(start, end - 1)
480481
or
481-
start = end - 1 and not normalCharacter(start - 1, start)
482+
start = end - 1 and not this.normalCharacter(start - 1, start)
482483
) and
483-
this.simpleCharacter(end - 1, end)
484+
this.normalCharacter(end - 1, end)
484485
}
485486

486487
private predicate characterItem(int start, int end) {
487-
this.simpleCharacterSequence(start, end) or
488+
this.normalCharacterSequence(start, end) or
488489
this.escapedCharacter(start, end) or
489490
this.specialCharacter(start, end, _)
490491
}

python/ql/test/library-tests/regex/Regex.ql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ predicate part(Regex r, int start, int end, string kind) {
66
or
77
r.normalCharacter(start, end) and kind = "char"
88
or
9+
r.escapedCharacter(start, end) and
10+
kind = "char" and
11+
not r.specialCharacter(start, end, _)
12+
or
913
r.specialCharacter(start, end, kind)
1014
or
1115
r.sequence(start, end) and kind = "sequence"

ruby/ql/lib/codeql/ruby/security/performance/ParseRegExp.qll

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,7 @@ class RegExp extends AST::RegExpLiteral {
382382
}
383383

384384
predicate normalCharacter(int start, int end) {
385+
end = start + 1 and
385386
this.character(start, end) and
386387
not this.specialCharacter(start, end, _)
387388
}
@@ -402,16 +403,16 @@ class RegExp extends AST::RegExpLiteral {
402403
}
403404

404405
/**
405-
* A sequence of 'simple' characters.
406+
* Holds if the range [start:end) consists of only 'normal' characters.
406407
*/
407-
predicate simpleCharacterSequence(int start, int end) {
408-
// a simple character inside a character set is interpreted on its own
409-
this.simpleCharacter(start, end) and
408+
predicate normalCharacterSequence(int start, int end) {
409+
// a normal character inside a character set is interpreted on its own
410+
this.normalCharacter(start, end) and
410411
this.inCharSet(start)
411412
or
412-
// a maximal run of simple characters is considered as one constant
413+
// a maximal run of normal characters is considered as one constant
413414
exists(int s, int e |
414-
e = max(int i | simpleCharacterRun(s, i)) and
415+
e = max(int i | this.normalCharacterRun(s, i)) and
415416
not this.inCharSet(s)
416417
|
417418
// 'abc' can be considered one constant, but
@@ -429,17 +430,17 @@ class RegExp extends AST::RegExpLiteral {
429430
)
430431
}
431432

432-
private predicate simpleCharacterRun(int start, int end) {
433+
private predicate normalCharacterRun(int start, int end) {
433434
(
434-
simpleCharacterRun(start, end - 1)
435+
this.normalCharacterRun(start, end - 1)
435436
or
436-
start = end - 1 and not normalCharacter(start - 1, start)
437+
start = end - 1 and not this.normalCharacter(start - 1, start)
437438
) and
438-
this.simpleCharacter(end - 1, end)
439+
this.normalCharacter(end - 1, end)
439440
}
440441

441442
private predicate characterItem(int start, int end) {
442-
this.simpleCharacterSequence(start, end) or
443+
this.normalCharacterSequence(start, end) or
443444
this.escapedCharacter(start, end) or
444445
this.specialCharacter(start, end, _)
445446
}

ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ newtype TRegExpParent =
229229
TRegExpGroup(RegExp re, int start, int end) { re.group(start, end) } or
230230
TRegExpSpecialChar(RegExp re, int start, int end) { re.specialCharacter(start, end, _) } or
231231
TRegExpNormalChar(RegExp re, int start, int end) {
232-
re.simpleCharacterSequence(start, end)
232+
re.normalCharacterSequence(start, end)
233233
or
234234
re.escapedCharacter(start, end) and
235235
not re.specialCharacter(start, end, _)

0 commit comments

Comments
 (0)