Skip to content

Commit b3601b1

Browse files
authored
Merge pull request #18946 from paldepind/rust-regex-injection
Rust: Add regular expression injection query
2 parents 269f9fa + 1e0b78e commit b3601b1

File tree

22 files changed

+354
-47
lines changed

22 files changed

+354
-47
lines changed

rust/ql/integration-tests/hello-project/summary.expected

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
| Macro calls - resolved | 2 |
1515
| Macro calls - total | 2 |
1616
| Macro calls - unresolved | 0 |
17-
| Taint edges - number of edges | 1670 |
17+
| Taint edges - number of edges | 1671 |
1818
| Taint reach - nodes tainted | 0 |
1919
| Taint reach - per million nodes | 0 |
2020
| Taint sinks - cryptographic operations | 0 |

rust/ql/integration-tests/hello-workspace/summary.cargo.expected

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
| Macro calls - resolved | 2 |
1515
| Macro calls - total | 2 |
1616
| Macro calls - unresolved | 0 |
17-
| Taint edges - number of edges | 1670 |
17+
| Taint edges - number of edges | 1671 |
1818
| Taint reach - nodes tainted | 0 |
1919
| Taint reach - per million nodes | 0 |
2020
| Taint sinks - cryptographic operations | 0 |

rust/ql/integration-tests/hello-workspace/summary.rust-project.expected

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
| Macro calls - resolved | 2 |
1515
| Macro calls - total | 2 |
1616
| Macro calls - unresolved | 0 |
17-
| Taint edges - number of edges | 1670 |
17+
| Taint edges - number of edges | 1671 |
1818
| Taint reach - nodes tainted | 0 |
1919
| Taint reach - per million nodes | 0 |
2020
| Taint sinks - cryptographic operations | 0 |
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Models for the `regex` crate.
2+
extensions:
3+
- addsTo:
4+
pack: codeql/rust-all
5+
extensible: summaryModel
6+
data:
7+
- ["repo:https://github.com/rust-lang/regex:regex", "crate::escape", "Argument[0].Reference", "ReturnValue", "taint", "manual"]
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/**
2+
* Provides classes and predicates to reason about regular expression injection
3+
* vulnerabilities.
4+
*/
5+
6+
private import codeql.util.Unit
7+
private import rust
8+
private import codeql.rust.dataflow.DataFlow
9+
private import codeql.rust.controlflow.CfgNodes
10+
private import codeql.rust.dataflow.FlowSink
11+
12+
/**
13+
* A data flow sink for regular expression injection vulnerabilities.
14+
*/
15+
abstract class RegexInjectionSink extends DataFlow::Node { }
16+
17+
/**
18+
* A barrier for regular expression injection vulnerabilities.
19+
*/
20+
abstract class RegexInjectionBarrier extends DataFlow::Node { }
21+
22+
/** A sink for `a` in `Regex::new(a)` when `a` is not a literal. */
23+
private class NewRegexInjectionSink extends RegexInjectionSink {
24+
NewRegexInjectionSink() {
25+
exists(CallExprCfgNode call, PathExpr path |
26+
path = call.getFunction().getExpr() and
27+
path.getResolvedCrateOrigin() = "repo:https://github.com/rust-lang/regex:regex" and
28+
path.getResolvedPath() = "<crate::regex::string::Regex>::new" and
29+
this.asExpr() = call.getArgument(0) and
30+
not this.asExpr() instanceof LiteralExprCfgNode
31+
)
32+
}
33+
}
34+
35+
private class MadRegexInjectionSink extends RegexInjectionSink {
36+
MadRegexInjectionSink() { sinkNode(this, "regex-use") }
37+
}
38+
39+
/**
40+
* A unit class for adding additional flow steps.
41+
*/
42+
class RegexInjectionAdditionalFlowStep extends Unit {
43+
/**
44+
* Holds if the step from `node1` to `node2` should be considered a flow
45+
* step for paths related to regular expression injection vulnerabilities.
46+
*/
47+
abstract predicate step(DataFlow::Node node1, DataFlow::Node node2);
48+
}
49+
50+
/**
51+
* An escape barrier for regular expression injection vulnerabilities.
52+
*/
53+
private class RegexInjectionDefaultBarrier extends RegexInjectionBarrier {
54+
RegexInjectionDefaultBarrier() {
55+
// A barrier is any call to a function named `escape`, in particular this
56+
// makes calls to `regex::escape` a barrier.
57+
this.asExpr()
58+
.getExpr()
59+
.(CallExpr)
60+
.getFunction()
61+
.(PathExpr)
62+
.getPath()
63+
.getPart()
64+
.getNameRef()
65+
.getText() = "escape"
66+
}
67+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
Constructing a regular expression with unsanitized user input can be dangerous.
9+
A malicious user may be able to modify the meaning of the expression, causing it
10+
to match unexpected strings and construct large regular expressions by using
11+
counted repetitions.
12+
</p>
13+
</overview>
14+
15+
<recommendation>
16+
<p>
17+
Before embedding user input into a regular expression, escape the input string
18+
using a function such as <a
19+
href="https://docs.rs/regex/latest/regex/fn.escape.html">regex::escape</a> to
20+
escape meta-characters that have special meaning.
21+
</p>
22+
<p>
23+
If purposefully supporting user supplied regular expressions, then use <a
24+
href="https://docs.rs/regex/latest/regex/struct.RegexBuilder.html#method.size_limit">RegexBuilder::size_limit</a>
25+
to limit the pattern size so that it is no larger than necessary.
26+
</p>
27+
</recommendation>
28+
29+
<example>
30+
<p>
31+
The following example constructs a regular expressions from the user input
32+
<code>key</code> without escaping it first.
33+
</p>
34+
35+
<sample src="RegexInjectionBad.rs" />
36+
37+
<p>
38+
The regular expression is intended to match strings starting with
39+
<code>"property"</code> such as <code>"property:foo=bar"</code>. However, a
40+
malicious user might inject the regular expression <code>".*^|key"</code> and
41+
unexpectedly cause strings such as <code>"key=secret"</code> to match.
42+
</p>
43+
<p>
44+
If user input is used to construct a regular expression, it should be escaped
45+
first. This ensures that malicious users cannot insert characters that have special
46+
meanings in regular expressions.
47+
</p>
48+
<sample src="RegexInjectionGood.rs" />
49+
</example>
50+
51+
<references>
52+
<li>
53+
<code>regex</code> crate documentation: <a href="https://docs.rs/regex/latest/regex/index.html#untrusted-patterns">Untrusted patterns</a>.
54+
</li>
55+
</references>
56+
</qhelp>
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/**
2+
* @name Regular expression injection
3+
* @description User input should not be used in regular expressions without first being
4+
* escaped, otherwise a malicious user may be able to inject an expression that
5+
* could modify the meaning of the expression, causing it to match unexpected
6+
* strings.
7+
* @kind path-problem
8+
* @problem.severity error
9+
* @security-severity 7.8
10+
* @precision high
11+
* @id rust/regex-injection
12+
* @tags security
13+
* external/cwe/cwe-020
14+
* external/cwe/cwe-074
15+
*/
16+
17+
private import rust
18+
private import codeql.rust.dataflow.DataFlow
19+
private import codeql.rust.dataflow.TaintTracking
20+
private import codeql.rust.Concepts
21+
private import codeql.rust.security.regex.RegexInjectionExtensions
22+
23+
/**
24+
* A taint configuration for detecting regular expression injection vulnerabilities.
25+
*/
26+
module RegexInjectionConfig implements DataFlow::ConfigSig {
27+
predicate isSource(DataFlow::Node source) { source instanceof ActiveThreatModelSource }
28+
29+
predicate isSink(DataFlow::Node sink) { sink instanceof RegexInjectionSink }
30+
31+
predicate isBarrier(DataFlow::Node barrier) { barrier instanceof RegexInjectionBarrier }
32+
33+
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
34+
any(RegexInjectionAdditionalFlowStep s).step(nodeFrom, nodeTo)
35+
}
36+
}
37+
38+
/**
39+
* Detect taint flow of tainted data that reaches a regular expression sink.
40+
*/
41+
module RegexInjectionFlow = TaintTracking::Global<RegexInjectionConfig>;
42+
43+
private import RegexInjectionFlow::PathGraph
44+
45+
from RegexInjectionFlow::PathNode sourceNode, RegexInjectionFlow::PathNode sinkNode
46+
where RegexInjectionFlow::flowPath(sourceNode, sinkNode)
47+
select sinkNode.getNode(), sourceNode, sinkNode,
48+
"This regular expression is constructed from a $@.", sourceNode.getNode(), "user-provided value"
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
use regex::Regex;
2+
3+
fn get_value<'h>(key: &str, property: &'h str) -> Option<&'h str> {
4+
// BAD: User provided `key` is interpolated into the regular expression.
5+
let pattern = format!(r"^property:{key}=(.*)$");
6+
let re = Regex::new(&pattern).unwrap();
7+
re.captures(property)?.get(1).map(|m| m.as_str())
8+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
use regex::{escape, Regex};
2+
3+
fn get_value<'h>(key: &str, property: &'h str) -> option<&'h str> {
4+
// GOOD: User input is escaped before being used in the regular expression.
5+
let escaped_key = escape(key);
6+
let pattern = format!(r"^property:{escaped_key}=(.*)$");
7+
let re = regex::new(&pattern).unwrap();
8+
re.captures(property)?.get(1).map(|m| m.as_str())
9+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
extensions:
2+
- addsTo:
3+
pack: codeql/threat-models
4+
extensible: threatModelConfiguration
5+
data:
6+
- ["local", true, 0]

0 commit comments

Comments
 (0)