Skip to content

Commit e5d23b2

Browse files
authored
Merge pull request github#3801 from RasmusWL/python-3521-revived
Python: Add support for detecting XSLT Injection (github#3521 revived)
2 parents 9d8052a + b164f26 commit e5d23b2

File tree

13 files changed

+398
-0
lines changed

13 files changed

+398
-0
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<!DOCTYPE qhelp SYSTEM "qhelp.dtd">
2+
<qhelp>
3+
<overview>
4+
<p>
5+
Processing an unvalidated XSL stylesheet can allow an attacker to change the structure and contents of the resultant XML, include arbitrary files from the file system, or execute arbitrary code.
6+
</p>
7+
</overview>
8+
<recommendation>
9+
<p>
10+
This vulnerability can be prevented by not allowing untrusted user input to be passed as an XSL stylesheet.
11+
If the application logic necessiates processing untrusted XSL stylesheets, the input should be properly filtered and sanitized before use.
12+
</p>
13+
</recommendation>
14+
<example>
15+
<p>In the example below, the XSL stylesheet is controlled by the user and hence leads to a vulnerability.</p>
16+
<sample src="xslt.py" />
17+
</example>
18+
</qhelp>
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/**
2+
* @name XSLT query built from user-controlled sources
3+
* @description Building a XSLT query from user-controlled sources is vulnerable to insertion of
4+
* malicious XSLT code by the user.
5+
* @kind path-problem
6+
* @problem.severity error
7+
* @precision high
8+
* @id py/xslt-injection
9+
* @tags security
10+
* external/cwe/cwe-643
11+
*/
12+
13+
import python
14+
import semmle.python.security.Paths
15+
/* Sources */
16+
import semmle.python.web.HttpRequest
17+
/* Sinks */
18+
import experimental.semmle.python.security.injection.XSLT
19+
20+
class XSLTInjectionConfiguration extends TaintTracking::Configuration {
21+
XSLTInjectionConfiguration() { this = "XSLT injection configuration" }
22+
23+
override predicate isSource(TaintTracking::Source source) {
24+
source instanceof HttpRequestTaintSource
25+
}
26+
27+
override predicate isSink(TaintTracking::Sink sink) {
28+
sink instanceof XSLTInjection::XSLTInjectionSink
29+
}
30+
}
31+
32+
from XSLTInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
33+
where config.hasFlowPath(src, sink)
34+
select sink.getSink(), src, sink, "This XSLT query depends on $@.", src.getSource(),
35+
"a user-provided value"
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from lxml import etree
2+
from io import StringIO
3+
from flask import Flask, request
4+
5+
app = Flask(__name__)
6+
7+
8+
@app.route("/xslt")
9+
def bad():
10+
xsltQuery = request.args.get('xml', '')
11+
xslt_root = etree.XML(xsltQuery)
12+
f = StringIO('<foo><bar></bar></foo>')
13+
tree = etree.parse(f)
14+
result_tree = tree.xslt(xslt_root) # Not OK
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/**
2+
* Provides class and predicates to track external data that
3+
* may represent malicious XSLT query objects.
4+
*
5+
* This module is intended to be imported into a taint-tracking query
6+
* to extend `TaintKind` and `TaintSink`.
7+
*/
8+
9+
import python
10+
import semmle.python.dataflow.TaintTracking
11+
import semmle.python.web.HttpRequest
12+
13+
/** Models XSLT Injection related classes and functions */
14+
module XSLTInjection {
15+
/** Returns a class value which refers to `lxml.etree` */
16+
Value etree() { result = Value::named("lxml.etree") }
17+
18+
/** A generic taint sink that is vulnerable to XSLT injection. */
19+
abstract class XSLTInjectionSink extends TaintSink { }
20+
21+
/**
22+
* A kind of "taint", representing an untrusted XML string
23+
*/
24+
private class ExternalXmlStringKind extends ExternalStringKind {
25+
ExternalXmlStringKind() { this = "etree.XML string" }
26+
27+
override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
28+
etreeXML(fromnode, tonode) and result instanceof ExternalXmlKind
29+
or
30+
etreeFromStringList(fromnode, tonode) and result instanceof ExternalXmlKind
31+
or
32+
etreeFromString(fromnode, tonode) and result instanceof ExternalXmlKind
33+
}
34+
}
35+
36+
/**
37+
* A kind of "taint", representing a XML encoded string
38+
*/
39+
class ExternalXmlKind extends TaintKind {
40+
ExternalXmlKind() { this = "lxml etree xml" }
41+
}
42+
43+
private predicate etreeXML(ControlFlowNode fromnode, CallNode tonode) {
44+
// etree.XML("<xmlContent>")
45+
exists(CallNode call | call.getFunction().(AttrNode).getObject("XML").pointsTo(etree()) |
46+
call.getArg(0) = fromnode and
47+
call = tonode
48+
)
49+
}
50+
51+
private predicate etreeFromString(ControlFlowNode fromnode, CallNode tonode) {
52+
// etree.fromstring(text, parser=None)
53+
exists(CallNode call | call.getFunction().(AttrNode).getObject("fromstring").pointsTo(etree()) |
54+
call.getArg(0) = fromnode and
55+
call = tonode
56+
)
57+
}
58+
59+
private predicate etreeFromStringList(ControlFlowNode fromnode, CallNode tonode) {
60+
// etree.fromstringlist(strings, parser=None)
61+
exists(CallNode call |
62+
call.getFunction().(AttrNode).getObject("fromstringlist").pointsTo(etree())
63+
|
64+
call.getArg(0) = fromnode and
65+
call = tonode
66+
)
67+
}
68+
69+
/**
70+
* A Sink representing an argument to the `etree.XSLT` call.
71+
*
72+
* from lxml import etree
73+
* root = etree.XML("<xmlContent>")
74+
* find_text = etree.XSLT("`sink`")
75+
*/
76+
private class EtreeXSLTArgument extends XSLTInjectionSink {
77+
override string toString() { result = "lxml.etree.XSLT" }
78+
79+
EtreeXSLTArgument() {
80+
exists(CallNode call | call.getFunction().(AttrNode).getObject("XSLT").pointsTo(etree()) |
81+
call.getArg(0) = this
82+
)
83+
}
84+
85+
override predicate sinks(TaintKind kind) { kind instanceof ExternalXmlKind }
86+
}
87+
88+
/**
89+
* A Sink representing an argument to the `XSLT` call to a parsed xml document.
90+
*
91+
* from lxml import etree
92+
* from io import StringIO
93+
* `sink` = etree.XML(xsltQuery)
94+
* tree = etree.parse(f)
95+
* result_tree = tree.xslt(`sink`)
96+
*/
97+
private class ParseXSLTArgument extends XSLTInjectionSink {
98+
override string toString() { result = "lxml.etree.parse.xslt" }
99+
100+
ParseXSLTArgument() {
101+
exists(
102+
CallNode parseCall, CallNode xsltCall, ControlFlowNode obj, Variable var, AssignStmt assign
103+
|
104+
parseCall.getFunction().(AttrNode).getObject("parse").pointsTo(etree()) and
105+
assign.getValue().(Call).getAFlowNode() = parseCall and
106+
xsltCall.getFunction().(AttrNode).getObject("xslt") = obj and
107+
var.getAUse() = obj and
108+
assign.getATarget() = var.getAStore() and
109+
xsltCall.getArg(0) = this
110+
)
111+
}
112+
113+
override predicate sinks(TaintKind kind) { kind instanceof ExternalXmlKind }
114+
}
115+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
edges
2+
| xslt.py:10:17:10:28 | dict of etree.XML string | xslt.py:10:17:10:43 | etree.XML string |
3+
| xslt.py:10:17:10:28 | dict of etree.XML string | xslt.py:10:17:10:43 | etree.XML string |
4+
| xslt.py:10:17:10:43 | etree.XML string | xslt.py:11:27:11:35 | etree.XML string |
5+
| xslt.py:10:17:10:43 | etree.XML string | xslt.py:11:27:11:35 | etree.XML string |
6+
| xslt.py:11:17:11:36 | lxml etree xml | xslt.py:14:29:14:37 | lxml etree xml |
7+
| xslt.py:11:17:11:36 | lxml etree xml | xslt.py:14:29:14:37 | lxml etree xml |
8+
| xslt.py:11:27:11:35 | etree.XML string | xslt.py:11:17:11:36 | lxml etree xml |
9+
| xslt.py:11:27:11:35 | etree.XML string | xslt.py:11:17:11:36 | lxml etree xml |
10+
| xsltInjection.py:10:17:10:28 | dict of etree.XML string | xsltInjection.py:10:17:10:43 | etree.XML string |
11+
| xsltInjection.py:10:17:10:28 | dict of etree.XML string | xsltInjection.py:10:17:10:43 | etree.XML string |
12+
| xsltInjection.py:10:17:10:43 | etree.XML string | xsltInjection.py:11:27:11:35 | etree.XML string |
13+
| xsltInjection.py:10:17:10:43 | etree.XML string | xsltInjection.py:11:27:11:35 | etree.XML string |
14+
| xsltInjection.py:11:17:11:36 | lxml etree xml | xsltInjection.py:12:28:12:36 | lxml etree xml |
15+
| xsltInjection.py:11:17:11:36 | lxml etree xml | xsltInjection.py:12:28:12:36 | lxml etree xml |
16+
| xsltInjection.py:11:27:11:35 | etree.XML string | xsltInjection.py:11:17:11:36 | lxml etree xml |
17+
| xsltInjection.py:11:27:11:35 | etree.XML string | xsltInjection.py:11:17:11:36 | lxml etree xml |
18+
| xsltInjection.py:17:17:17:28 | dict of etree.XML string | xsltInjection.py:17:17:17:43 | etree.XML string |
19+
| xsltInjection.py:17:17:17:28 | dict of etree.XML string | xsltInjection.py:17:17:17:43 | etree.XML string |
20+
| xsltInjection.py:17:17:17:43 | etree.XML string | xsltInjection.py:18:27:18:35 | etree.XML string |
21+
| xsltInjection.py:17:17:17:43 | etree.XML string | xsltInjection.py:18:27:18:35 | etree.XML string |
22+
| xsltInjection.py:18:17:18:36 | lxml etree xml | xsltInjection.py:21:29:21:37 | lxml etree xml |
23+
| xsltInjection.py:18:17:18:36 | lxml etree xml | xsltInjection.py:21:29:21:37 | lxml etree xml |
24+
| xsltInjection.py:18:27:18:35 | etree.XML string | xsltInjection.py:18:17:18:36 | lxml etree xml |
25+
| xsltInjection.py:18:27:18:35 | etree.XML string | xsltInjection.py:18:17:18:36 | lxml etree xml |
26+
| xsltInjection.py:26:17:26:28 | dict of etree.XML string | xsltInjection.py:26:17:26:43 | etree.XML string |
27+
| xsltInjection.py:26:17:26:28 | dict of etree.XML string | xsltInjection.py:26:17:26:43 | etree.XML string |
28+
| xsltInjection.py:26:17:26:43 | etree.XML string | xsltInjection.py:27:27:27:35 | etree.XML string |
29+
| xsltInjection.py:26:17:26:43 | etree.XML string | xsltInjection.py:27:27:27:35 | etree.XML string |
30+
| xsltInjection.py:27:17:27:36 | lxml etree xml | xsltInjection.py:31:24:31:32 | lxml etree xml |
31+
| xsltInjection.py:27:17:27:36 | lxml etree xml | xsltInjection.py:31:24:31:32 | lxml etree xml |
32+
| xsltInjection.py:27:27:27:35 | etree.XML string | xsltInjection.py:27:17:27:36 | lxml etree xml |
33+
| xsltInjection.py:27:27:27:35 | etree.XML string | xsltInjection.py:27:17:27:36 | lxml etree xml |
34+
| xsltInjection.py:35:17:35:28 | dict of etree.XML string | xsltInjection.py:35:17:35:43 | etree.XML string |
35+
| xsltInjection.py:35:17:35:28 | dict of etree.XML string | xsltInjection.py:35:17:35:43 | etree.XML string |
36+
| xsltInjection.py:35:17:35:43 | etree.XML string | xsltInjection.py:36:34:36:42 | etree.XML string |
37+
| xsltInjection.py:35:17:35:43 | etree.XML string | xsltInjection.py:36:34:36:42 | etree.XML string |
38+
| xsltInjection.py:36:17:36:43 | lxml etree xml | xsltInjection.py:40:24:40:32 | lxml etree xml |
39+
| xsltInjection.py:36:17:36:43 | lxml etree xml | xsltInjection.py:40:24:40:32 | lxml etree xml |
40+
| xsltInjection.py:36:34:36:42 | etree.XML string | xsltInjection.py:36:17:36:43 | lxml etree xml |
41+
| xsltInjection.py:36:34:36:42 | etree.XML string | xsltInjection.py:36:17:36:43 | lxml etree xml |
42+
#select
43+
| xslt.py:14:29:14:37 | xslt_root | xslt.py:10:17:10:28 | dict of etree.XML string | xslt.py:14:29:14:37 | lxml etree xml | This XSLT query depends on $@. | xslt.py:10:17:10:28 | Attribute | a user-provided value |
44+
| xsltInjection.py:12:28:12:36 | xslt_root | xsltInjection.py:10:17:10:28 | dict of etree.XML string | xsltInjection.py:12:28:12:36 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:10:17:10:28 | Attribute | a user-provided value |
45+
| xsltInjection.py:21:29:21:37 | xslt_root | xsltInjection.py:17:17:17:28 | dict of etree.XML string | xsltInjection.py:21:29:21:37 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:17:17:17:28 | Attribute | a user-provided value |
46+
| xsltInjection.py:31:24:31:32 | xslt_root | xsltInjection.py:26:17:26:28 | dict of etree.XML string | xsltInjection.py:31:24:31:32 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:26:17:26:28 | Attribute | a user-provided value |
47+
| xsltInjection.py:40:24:40:32 | xslt_root | xsltInjection.py:35:17:35:28 | dict of etree.XML string | xsltInjection.py:40:24:40:32 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:35:17:35:28 | Attribute | a user-provided value |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
experimental/CWE-643/Xslt.ql
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
| xslt.py:14:29:14:37 | lxml.etree.parse.xslt | lxml etree xml |
2+
| xsltInjection.py:12:28:12:36 | lxml.etree.XSLT | lxml etree xml |
3+
| xsltInjection.py:21:29:21:37 | lxml.etree.parse.xslt | lxml etree xml |
4+
| xsltInjection.py:31:24:31:32 | lxml.etree.parse.xslt | lxml etree xml |
5+
| xsltInjection.py:40:24:40:32 | lxml.etree.parse.xslt | lxml etree xml |
6+
| xsltInjection.py:50:24:50:32 | lxml.etree.parse.xslt | lxml etree xml |
7+
| xsltInjection.py:60:24:60:32 | lxml.etree.parse.xslt | lxml etree xml |
8+
| xsltInjection.py:69:24:69:32 | lxml.etree.parse.xslt | lxml etree xml |
9+
| xsltInjection.py:79:24:79:32 | lxml.etree.parse.xslt | lxml etree xml |
10+
| xsltSinks.py:17:28:17:36 | lxml.etree.XSLT | lxml etree xml |
11+
| xsltSinks.py:30:29:30:37 | lxml.etree.parse.xslt | lxml etree xml |
12+
| xsltSinks.py:44:24:44:32 | lxml.etree.parse.xslt | lxml etree xml |
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import python
2+
import experimental.semmle.python.security.injection.XSLT
3+
4+
from XSLTInjection::XSLTInjectionSink sink, TaintKind kind
5+
where sink.sinks(kind)
6+
select sink, kind
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
semmle-extractor-options: -p ../../query-tests/Security/lib/ --max-import-depth=3
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from lxml import etree
2+
from io import StringIO
3+
from flask import Flask, request
4+
5+
app = Flask(__name__)
6+
7+
8+
@app.route("/xslt")
9+
def bad():
10+
xsltQuery = request.args.get('xml', '')
11+
xslt_root = etree.XML(xsltQuery)
12+
f = StringIO('<foo><bar></bar></foo>')
13+
tree = etree.parse(f)
14+
result_tree = tree.xslt(xslt_root) # Not OK

0 commit comments

Comments
 (0)