|
| 1 | +/** |
| 2 | + * Finds usage of the `String` regex methods `matches`, `replaceAll`, `replaceFirst` and `split` |
| 3 | + * where the intention might have been to treat the argument literally but instead it is |
| 4 | + * interpreted as regex pattern. |
| 5 | + * |
| 6 | + * The method `java.util.regex.Pattern.quote` should be used to make sure none of the characters |
| 7 | + * in the argument are treated as special regex pattern characters. |
| 8 | + * |
| 9 | + * @kind problem |
| 10 | + * @precision low |
| 11 | + */ |
| 12 | + |
| 13 | +// TODO: Improve precision |
| 14 | +import java |
| 15 | + |
| 16 | +class StringRegexMethod extends Method { |
| 17 | + StringRegexMethod() { |
| 18 | + getDeclaringType() instanceof TypeString and |
| 19 | + hasName(["matches", "replaceAll", "replaceFirst", "split"]) |
| 20 | + } |
| 21 | +} |
| 22 | + |
| 23 | +class RegexPatternMethod extends Method { |
| 24 | + RegexPatternMethod() { |
| 25 | + getDeclaringType().hasQualifiedName("java.util.regex", "Pattern") and |
| 26 | + hasName(["compile", "matches"]) |
| 27 | + } |
| 28 | +} |
| 29 | + |
| 30 | +predicate isRelevantNonRegexUsage(RValue varRead) { |
| 31 | + not exists(MethodAccess otherStringRegexCall | |
| 32 | + otherStringRegexCall.getMethod() instanceof StringRegexMethod and |
| 33 | + otherStringRegexCall.getArgument(0) = varRead |
| 34 | + ) and |
| 35 | + not any(EqualityTest e).getAnOperand() = varRead and |
| 36 | + not exists(MethodAccess equalsCall | equalsCall.getMethod() instanceof EqualsMethod | |
| 37 | + equalsCall.getQualifier() = varRead or |
| 38 | + equalsCall.getArgument(0) = varRead |
| 39 | + ) |
| 40 | +} |
| 41 | + |
| 42 | +from Variable var, RValue regexUsage, MethodAccess stringRegexCall, RValue otherUsage |
| 43 | +where |
| 44 | + stringRegexCall.getMethod() instanceof StringRegexMethod and |
| 45 | + stringRegexCall.getArgument(0) = regexUsage and |
| 46 | + regexUsage = var.getAnAccess() and |
| 47 | + // Ignore if var name indicates intentional usage |
| 48 | + not var.getName().matches(["%regex%", "%Regex%", "%pattern%", "%Pattern%"]) and |
| 49 | + // Ignore if explicitly catching PatternSyntaxException |
| 50 | + not exists(TryStmt tryStmt | |
| 51 | + tryStmt.getBlock() = stringRegexCall.getAnEnclosingStmt() and |
| 52 | + tryStmt |
| 53 | + .getACatchClause() |
| 54 | + .getACaughtType() |
| 55 | + .hasQualifiedName("java.util.regex", "PatternSyntaxException") |
| 56 | + ) and |
| 57 | + otherUsage = var.getAnAccess() and |
| 58 | + otherUsage != regexUsage and |
| 59 | + isRelevantNonRegexUsage(otherUsage) and |
| 60 | + // Ignore if var is also explicitly used for Pattern method |
| 61 | + not exists(MethodAccess patternCall | |
| 62 | + patternCall.getMethod() instanceof RegexPatternMethod and |
| 63 | + patternCall.getArgument(0) = var.getAnAccess() |
| 64 | + ) and |
| 65 | + // If var has constant value, that value must contain special regex character |
| 66 | + ( |
| 67 | + var.getInitializer() instanceof CompileTimeConstantExpr |
| 68 | + implies |
| 69 | + // Contains character which has special regex meaning |
| 70 | + exists( |
| 71 | + var.getInitializer() |
| 72 | + .(CompileTimeConstantExpr) |
| 73 | + .getStringValue() |
| 74 | + .indexOf(["\\", "[", "]", ".", "^", "$", "?", "*", "+", "{", "}", "|", "(", ")",]) |
| 75 | + ) |
| 76 | + ) |
| 77 | +select regexUsage, |
| 78 | + "Possibly accidental interpretation as Regex pattern because value is used $@ not as Regex pattern", |
| 79 | + otherUsage, "here" |
0 commit comments