Skip to content

Commit b28673a

Browse files
committed
Add some Regex queries
1 parent dc3389f commit b28673a

File tree

2 files changed

+74
-0
lines changed

2 files changed

+74
-0
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/**
2+
* Finds Regex patterns containing `(...)` which was most likely not intended to be
3+
* treated as group but instead literally.
4+
*
5+
* For example in the pattern `Action ".*" failed (cancelled)` the part `(cancelled)`
6+
* was most likely supposed to be matched literally, but it is actually interpreted as
7+
* group and therefore `(` and `)` are not expected in the input. The `(` and `)`
8+
* should be escaped with a `\` in this case.
9+
*
10+
* @id todo
11+
* @kind problem
12+
*/
13+
14+
import java
15+
// Uses alias `re` to avoid conflicting declarations
16+
import semmle.code.java.regex.RegexTreeView as re
17+
18+
class LiteralRegExpChar extends re::RegExpNormalChar {
19+
LiteralRegExpChar() {
20+
// RegExpNormalChar documentation says it also matches character classes; ignore them here
21+
not exists(this.getRawValue().indexOf("\\"))
22+
}
23+
}
24+
25+
// Note: This does not match all Regex patterns, see
26+
// https://github.com/github/codeql/blob/codeql-cli/v2.15.5/java/ql/lib/semmle/code/java/regex/RegexFlowConfigs.qll#L161-L162
27+
from re::RegExpGroup group
28+
where
29+
// Ignore special group syntax (non-capturing, lookahead, ...), which suggests group is intentional
30+
not group.getRawValue().matches("(?%") and
31+
// Require that group contains only literals; otherwise captured group content might be used somewhere
32+
forall(re::RegExpTerm child | child = group.getAChild() | child instanceof LiteralRegExpChar) and
33+
// Ignore if group has quantifier (e.g. `(ab)+`), then it is most likely intentional
34+
not group.getParent() instanceof re::RegExpQuantifier
35+
// TODO: Maybe check for parse errors to reduce false-positives, with `not group.getRegex().failedToParse(_)`?
36+
select group, "Potential accidental group"
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/**
2+
* Finds Regex patterns with a character class which contains the same character multiple
3+
* times. This is redundant and might indicate that the string was not supposed to represent
4+
* a character class.
5+
*
6+
* For example in the pattern `[ERROR] some message.*` the part `[ERROR]` is actually a
7+
* character class which matches any of these characters. The `[` and `]` should be escaped
8+
* with a `\` in this case.
9+
*
10+
* Note that a `|` _inside a character class_ does not represent an 'either' and is instead
11+
* matched literally. E.g. the pattern `[ab|cd|ef]` also matches the string `"|"`.
12+
*
13+
* This issue is also reported by IntelliJ as `RegExpDuplicateCharacterInClass`.
14+
*
15+
* @id todo
16+
* @kind problem
17+
*/
18+
19+
import java
20+
// Uses alias `re` to avoid conflicting declarations
21+
import semmle.code.java.regex.RegexTreeView as re
22+
23+
// Note: This does not match all Regex patterns, see
24+
// https://github.com/github/codeql/blob/codeql-cli/v2.15.5/java/ql/lib/semmle/code/java/regex/RegexFlowConfigs.qll#L161-L162
25+
from
26+
re::RegExpCharacterClass charClass, int indexA, re::RegExpNormalChar charA, int indexB,
27+
re::RegExpNormalChar charB, string charValue
28+
where
29+
charA = charClass.getChild(indexA) and
30+
charB = charClass.getChild(indexB) and
31+
// Prevent reporting twice with order reversed
32+
indexA < indexB and
33+
charValue = charA.getRawValue() and
34+
charValue = charB.getRawValue() and
35+
// Ignore false positives for `&&`, which does not seem to be recognized by CodeQL Regex library yet
36+
not (charValue = "&" and indexA + 1 = indexB)
37+
// TODO: Maybe check for parse errors to reduce false-positives, with `not charClass.getRegex().failedToParse(_)`?
38+
select charClass, "Contains '" + charValue + "' twice $@ and $@", charA, "here", charB, "here"

0 commit comments

Comments
 (0)