1
1
/** Provides classes and predicates related to regex injection in Java. */
2
2
3
3
import java
4
- import semmle.code.java.dataflow.FlowSources
5
- import semmle.code.java.dataflow.TaintTracking
6
- import semmle.code.java.regex.RegexFlowConfigs
4
+ private import semmle.code.java.dataflow.DataFlow
5
+ private import semmle.code.java.frameworks.Regex
6
+ private import semmle.code.java.frameworks.apache.Lang
7
7
8
- /**
9
- * A data flow sink for untrusted user input used to construct regular expressions.
10
- */
8
+ /** A data flow sink for untrusted user input used to construct regular expressions. */
11
9
abstract class Sink extends DataFlow:: ExprNode { }
12
10
13
- /**
14
- * A sanitizer for untrusted user input used to construct regular expressions.
15
- */
11
+ /** A sanitizer for untrusted user input used to construct regular expressions. */
16
12
abstract class Sanitizer extends DataFlow:: ExprNode { }
17
13
18
- // TODO: look into further: Pattern.matcher, .pattern() and .toString() as taint steps, .split and .splitAsStream
19
- /**
20
- * A data flow sink for untrusted user input used to construct regular expressions.
21
- */
22
- private class RegexSink extends Sink {
23
- RegexSink ( ) {
14
+ private class RegexInjectionSink extends Sink {
15
+ RegexInjectionSink ( ) {
24
16
exists ( MethodAccess ma , Method m | m = ma .getMethod ( ) |
25
17
ma .getArgument ( 0 ) = this .asExpr ( ) and
26
18
(
27
- m .getDeclaringType ( ) instanceof TypeString and
28
- m .hasName ( [ "matches" , "split" , "replaceFirst" , "replaceAll" ] )
29
- or
30
- m .getDeclaringType ( ) instanceof RegexPattern and
31
- m .hasName ( [ "compile" , "matches" ] )
19
+ m instanceof StringRegexMethod or
20
+ m instanceof PatternRegexMethod
32
21
)
33
22
or
34
- m .getDeclaringType ( ) instanceof ApacheRegExUtils and
35
- (
36
- ma .getArgument ( 1 ) = this .asExpr ( ) and
37
- // only handles String param here because the other param option, Pattern, is already handled by `java.util.regex.Pattern` above
38
- m .getParameterType ( 1 ) instanceof TypeString and
39
- m .hasName ( [
40
- "removeAll" , "removeFirst" , "removePattern" , "replaceAll" , "replaceFirst" ,
41
- "replacePattern"
42
- ] )
43
- )
23
+ ma .getArgument ( 1 ) = this .asExpr ( ) and
24
+ m instanceof ApacheRegExUtilsMethod
44
25
)
45
26
}
46
27
}
47
28
48
- /**
49
- * A call to a function whose name suggests that it escapes regular
50
- * expression meta-characters.
51
- */
52
- class RegexInjectionSanitizer extends Sanitizer {
29
+ /** A call to a function which escapes regular expression meta-characters. */
30
+ private class RegexInjectionSanitizer extends Sanitizer {
53
31
RegexInjectionSanitizer ( ) {
32
+ // a function whose name suggests that it escapes regular expression meta-characters
54
33
exists ( string calleeName , string sanitize , string regexp |
55
34
calleeName = this .asExpr ( ) .( Call ) .getCallee ( ) .getName ( ) and
56
- // TODO: add test case for sanitize? I think current tests only check escape
57
- // TODO: should this be broader and only look for "escape|saniti[sz]e" and not "regexp?" as well? -- e.g. err on side of FNs?
58
35
sanitize = "(?:escape|saniti[sz]e)" and
59
36
regexp = "regexp?"
60
37
|
@@ -63,31 +40,70 @@ class RegexInjectionSanitizer extends Sanitizer {
63
40
".*)" )
64
41
)
65
42
or
66
- // adds Pattern.quote() as a sanitizer
67
- // https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html#quote-java.lang.String-: "Metacharacters or escape sequences in the input sequence will be given no special meaning."
68
- // see https://rules.sonarsource.com/java/RSPEC-2631 and https://sensei.securecodewarrior.com/recipes/scw:java:regex-injection
43
+ // a call to the `Pattern.quote` method, which gives metacharacters or escape sequences no special meaning
69
44
exists ( MethodAccess ma , Method m | m = ma .getMethod ( ) |
70
- m .getDeclaringType ( ) instanceof RegexPattern and
71
- (
72
- ma .getArgument ( 0 ) = this .asExpr ( ) and
73
- m .hasName ( "quote" )
74
- )
45
+ ma .getArgument ( 0 ) = this .asExpr ( ) and
46
+ m instanceof PatternQuoteMethod
47
+ )
48
+ or
49
+ // use of Pattern.LITERAL flag with `Pattern.compile` which gives metacharacters or escape sequences no special meaning
50
+ exists ( MethodAccess ma , Method m , Field field | m = ma .getMethod ( ) |
51
+ ma .getArgument ( 0 ) = this .asExpr ( ) and
52
+ m instanceof PatternRegexMethod and
53
+ m .hasName ( "compile" ) and
54
+ //ma.getArgument(1).toString() = "Pattern.LITERAL" and
55
+ field instanceof PatternLiteral and
56
+ ma .getArgument ( 1 ) = field .getAnAccess ( )
75
57
)
76
58
}
77
59
}
78
60
79
- // ******** HELPER CLASSES/METHODS (MAYBE MOVE ELSEWHERE?) ********
80
- // TODO: move below to Regex.qll??
81
- /** The Java class `java.util.regex.Pattern`. */
82
- private class RegexPattern extends RefType {
83
- RegexPattern ( ) { this .hasQualifiedName ( "java.util.regex" , "Pattern" ) }
61
+ /**
62
+ * The methods of the class `java.lang.String` that take a regular expression
63
+ * as a parameter.
64
+ */
65
+ private class StringRegexMethod extends Method {
66
+ StringRegexMethod ( ) {
67
+ this .getDeclaringType ( ) instanceof TypeString and
68
+ this .hasName ( [ "matches" , "split" , "replaceFirst" , "replaceAll" ] )
69
+ }
70
+ }
71
+
72
+ /**
73
+ * The methods of the class `java.util.regex.Pattern` that take a regular
74
+ * expression as a parameter.
75
+ */
76
+ private class PatternRegexMethod extends Method {
77
+ PatternRegexMethod ( ) {
78
+ this .getDeclaringType ( ) instanceof TypeRegexPattern and
79
+ this .hasName ( [ "compile" , "matches" ] )
80
+ }
84
81
}
85
82
86
- // /** The Java class `java.util.regex.Matcher`. */
87
- // private class RegexMatcher extends RefType {
88
- // RegexMatcher() { this.hasQualifiedName("java.util.regex", "Matcher") }
89
- // }
90
- /** The Java class `org.apache.commons.lang3.RegExUtils`. */
91
- private class ApacheRegExUtils extends RefType {
92
- ApacheRegExUtils ( ) { this .hasQualifiedName ( "org.apache.commons.lang3" , "RegExUtils" ) }
83
+ /** The `quote` method of the `java.util.regex.Pattern` class. */
84
+ private class PatternQuoteMethod extends Method {
85
+ PatternQuoteMethod ( ) { this .hasName ( [ "quote" ] ) }
86
+ }
87
+
88
+ /** The `LITERAL` field of the `java.util.regex.Pattern` class. */
89
+ private class PatternLiteral extends Field {
90
+ PatternLiteral ( ) {
91
+ this .getDeclaringType ( ) instanceof TypeRegexPattern and
92
+ this .hasName ( "LITERAL" )
93
+ }
94
+ }
95
+
96
+ /**
97
+ * The methods of the class `org.apache.commons.lang3.RegExUtils` that take
98
+ * a regular expression of type `String` as a parameter.
99
+ */
100
+ private class ApacheRegExUtilsMethod extends Method {
101
+ ApacheRegExUtilsMethod ( ) {
102
+ this .getDeclaringType ( ) instanceof TypeApacheRegExUtils and
103
+ // only handles String param here because the other param option, Pattern, is already handled by `java.util.regex.Pattern`
104
+ this .getParameterType ( 1 ) instanceof TypeString and
105
+ this .hasName ( [
106
+ "removeAll" , "removeFirst" , "removePattern" , "replaceAll" , "replaceFirst" , "replacePattern"
107
+ ] )
108
+ }
93
109
}
0 commit comments