Skip to content

Commit 0b36cd4

Browse files
authored
Merge pull request github#3522 from porcupineyhairs/pythonXpath
Python : Add Xpath injection query
2 parents 06a5242 + a519132 commit 0b36cd4

File tree

17 files changed

+446
-0
lines changed

17 files changed

+446
-0
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<!DOCTYPE qhelp SYSTEM "qhelp.dtd">
2+
<qhelp>
3+
<overview>
4+
<p>
5+
Using user-supplied information to construct an XPath query for XML data can
6+
result in an XPath injection flaw. By sending intentionally malformed information,
7+
an attacker can access data that he may not normally have access to.
8+
He/She may even be able to elevate his privileges on the web site if the XML data
9+
is being used for authentication (such as an XML based user file).
10+
</p>
11+
</overview>
12+
<recommendation>
13+
<p>
14+
XPath injection can be prevented using parameterized XPath interface or escaping the user input to make it safe to include in a dynamically constructed query.
15+
If you are using quotes to terminate untrusted input in a dynamically constructed XPath query, then you need to escape that quote in the untrusted input to ensure the untrusted data can’t try to break out of that quoted context.
16+
</p>
17+
<p>
18+
Another better mitigation option is to use a precompiled XPath query. Precompiled XPath queries are already preset before the program executes, rather than created on the fly after the user’s input has been added to the string. This is a better route because you don’t have to worry about missing a character that should have been escaped.
19+
</p>
20+
</recommendation>
21+
<example>
22+
<p>In the example below, the xpath query is controlled by the user and hence leads to a vulnerability.</p>
23+
<sample src="xpathBad.py" />
24+
<p> This can be fixed by using a parameterized query as shown below.</p>
25+
<sample src="xpathGood.py" />
26+
</example>
27+
<references>
28+
<li>OWASP XPath injection : <a href="https://owasp.org/www-community/attacks/XPATH_Injection"></a>/>> </li>
29+
</references>
30+
</qhelp>
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/**
2+
* @name XPath query built from user-controlled sources
3+
* @description Building a XPath query from user-controlled sources is vulnerable to insertion of
4+
* malicious Xpath code by the user.
5+
* @kind path-problem
6+
* @problem.severity error
7+
* @precision high
8+
* @id py/xpath-injection
9+
* @tags security
10+
* external/cwe/cwe-643
11+
*/
12+
13+
import python
14+
import semmle.python.security.Paths
15+
/* Sources */
16+
import semmle.python.web.HttpRequest
17+
/* Sinks */
18+
import experimental.semmle.python.security.injection.Xpath
19+
20+
class XpathInjectionConfiguration extends TaintTracking::Configuration {
21+
XpathInjectionConfiguration() { this = "Xpath injection configuration" }
22+
23+
override predicate isSource(TaintTracking::Source source) {
24+
source instanceof HttpRequestTaintSource
25+
}
26+
27+
override predicate isSink(TaintTracking::Sink sink) {
28+
sink instanceof XpathInjection::XpathInjectionSink
29+
}
30+
}
31+
32+
from XpathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
33+
where config.hasFlowPath(src, sink)
34+
select sink.getSink(), src, sink, "This Xpath query depends on $@.", src.getSource(),
35+
"a user-provided value"
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from lxml import etree
2+
from io import StringIO
3+
4+
from django.urls import path
5+
from django.http import HttpResponse
6+
from django.template import Template, Context, Engine, engines
7+
8+
9+
def a(request):
10+
value = request.GET['xpath']
11+
f = StringIO('<foo><bar></bar></foo>')
12+
tree = etree.parse(f)
13+
r = tree.xpath("/tag[@id='%s']" % value)
14+
15+
16+
urlpatterns = [
17+
path('a', a)
18+
]
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from lxml import etree
2+
from io import StringIO
3+
4+
from django.urls import path
5+
from django.http import HttpResponse
6+
from django.template import Template, Context, Engine, engines
7+
8+
9+
def a(request):
10+
value = request.GET['xpath']
11+
f = StringIO('<foo><bar></bar></foo>')
12+
tree = etree.parse(f)
13+
r = tree.xpath("/tag[@id=$tagid]", tagid=value)
14+
15+
16+
urlpatterns = [
17+
path('a', a)
18+
]
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/**
2+
* Provides class and predicates to track external data that
3+
* may represent malicious xpath query objects.
4+
*
5+
* This module is intended to be imported into a taint-tracking query
6+
* to extend `TaintKind` and `TaintSink`.
7+
*/
8+
9+
import python
10+
import semmle.python.dataflow.TaintTracking
11+
import semmle.python.web.HttpRequest
12+
13+
/** Models Xpath Injection related classes and functions */
14+
module XpathInjection {
15+
/** Returns a class value which refers to `lxml.etree` */
16+
Value etree() { result = Value::named("lxml.etree") }
17+
18+
/** Returns a class value which refers to `lxml.etree` */
19+
Value libxml2parseFile() { result = Value::named("libxml2.parseFile") }
20+
21+
/** A generic taint sink that is vulnerable to Xpath injection. */
22+
abstract class XpathInjectionSink extends TaintSink { }
23+
24+
/**
25+
* A Sink representing an argument to the `etree.Xpath` call.
26+
*
27+
* from lxml import etree
28+
* root = etree.XML("<xmlContent>")
29+
* find_text = etree.XPath("`sink`")
30+
*/
31+
private class EtreeXpathArgument extends XpathInjectionSink {
32+
override string toString() { result = "lxml.etree.Xpath" }
33+
34+
EtreeXpathArgument() {
35+
exists(CallNode call | call.getFunction().(AttrNode).getObject("XPath").pointsTo(etree()) |
36+
call.getArg(0) = this
37+
)
38+
}
39+
40+
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
41+
}
42+
43+
/**
44+
* A Sink representing an argument to the `etree.EtXpath` call.
45+
*
46+
* from lxml import etree
47+
* root = etree.XML("<xmlContent>")
48+
* find_text = etree.EtXPath("`sink`")
49+
*/
50+
private class EtreeETXpathArgument extends XpathInjectionSink {
51+
override string toString() { result = "lxml.etree.ETXpath" }
52+
53+
EtreeETXpathArgument() {
54+
exists(CallNode call | call.getFunction().(AttrNode).getObject("ETXPath").pointsTo(etree()) |
55+
call.getArg(0) = this
56+
)
57+
}
58+
59+
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
60+
}
61+
62+
/**
63+
* A Sink representing an argument to the `xpath` call to a parsed xml document.
64+
*
65+
* from lxml import etree
66+
* from io import StringIO
67+
* f = StringIO('<foo><bar></bar></foo>')
68+
* tree = etree.parse(f)
69+
* r = tree.xpath('`sink`')
70+
*/
71+
private class ParseXpathArgument extends XpathInjectionSink {
72+
override string toString() { result = "lxml.etree.parse.xpath" }
73+
74+
ParseXpathArgument() {
75+
exists(
76+
CallNode parseCall, CallNode xpathCall, ControlFlowNode obj, Variable var, AssignStmt assign
77+
|
78+
parseCall.getFunction().(AttrNode).getObject("parse").pointsTo(etree()) and
79+
assign.getValue().(Call).getAFlowNode() = parseCall and
80+
xpathCall.getFunction().(AttrNode).getObject("xpath") = obj and
81+
var.getAUse() = obj and
82+
assign.getATarget() = var.getAStore() and
83+
xpathCall.getArg(0) = this
84+
)
85+
}
86+
87+
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
88+
}
89+
90+
/**
91+
* A Sink representing an argument to the `xpathEval` call to a parsed libxml2 document.
92+
*
93+
* import libxml2
94+
* tree = libxml2.parseFile("file.xml")
95+
* r = tree.xpathEval('`sink`')
96+
*/
97+
private class ParseFileXpathEvalArgument extends XpathInjectionSink {
98+
override string toString() { result = "libxml2.parseFile.xpathEval" }
99+
100+
ParseFileXpathEvalArgument() {
101+
exists(
102+
CallNode parseCall, CallNode xpathCall, ControlFlowNode obj, Variable var, AssignStmt assign
103+
|
104+
parseCall.getFunction().(AttrNode).pointsTo(libxml2parseFile()) and
105+
assign.getValue().(Call).getAFlowNode() = parseCall and
106+
xpathCall.getFunction().(AttrNode).getObject("xpathEval") = obj and
107+
var.getAUse() = obj and
108+
assign.getATarget() = var.getAStore() and
109+
xpathCall.getArg(0) = this
110+
)
111+
}
112+
113+
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
114+
}
115+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
semmle-extractor-options: --max-import-depth=3 -p ../../query-tests/Security/lib/
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
edges
2+
| xpathBad.py:9:7:9:13 | django.request.HttpRequest | xpathBad.py:10:13:10:19 | django.request.HttpRequest |
3+
| xpathBad.py:9:7:9:13 | django.request.HttpRequest | xpathBad.py:10:13:10:19 | django.request.HttpRequest |
4+
| xpathBad.py:10:13:10:19 | django.request.HttpRequest | xpathBad.py:10:13:10:23 | django.http.request.QueryDict |
5+
| xpathBad.py:10:13:10:19 | django.request.HttpRequest | xpathBad.py:10:13:10:23 | django.http.request.QueryDict |
6+
| xpathBad.py:10:13:10:23 | django.http.request.QueryDict | xpathBad.py:10:13:10:32 | externally controlled string |
7+
| xpathBad.py:10:13:10:23 | django.http.request.QueryDict | xpathBad.py:10:13:10:32 | externally controlled string |
8+
| xpathBad.py:10:13:10:32 | externally controlled string | xpathBad.py:13:39:13:43 | externally controlled string |
9+
| xpathBad.py:10:13:10:32 | externally controlled string | xpathBad.py:13:39:13:43 | externally controlled string |
10+
| xpathBad.py:13:39:13:43 | externally controlled string | xpathBad.py:13:20:13:43 | externally controlled string |
11+
| xpathBad.py:13:39:13:43 | externally controlled string | xpathBad.py:13:20:13:43 | externally controlled string |
12+
| xpathFlow.py:11:18:11:29 | dict of externally controlled string | xpathFlow.py:11:18:11:44 | externally controlled string |
13+
| xpathFlow.py:11:18:11:29 | dict of externally controlled string | xpathFlow.py:11:18:11:44 | externally controlled string |
14+
| xpathFlow.py:11:18:11:44 | externally controlled string | xpathFlow.py:14:20:14:29 | externally controlled string |
15+
| xpathFlow.py:11:18:11:44 | externally controlled string | xpathFlow.py:14:20:14:29 | externally controlled string |
16+
| xpathFlow.py:20:18:20:29 | dict of externally controlled string | xpathFlow.py:20:18:20:44 | externally controlled string |
17+
| xpathFlow.py:20:18:20:29 | dict of externally controlled string | xpathFlow.py:20:18:20:44 | externally controlled string |
18+
| xpathFlow.py:20:18:20:44 | externally controlled string | xpathFlow.py:23:29:23:38 | externally controlled string |
19+
| xpathFlow.py:20:18:20:44 | externally controlled string | xpathFlow.py:23:29:23:38 | externally controlled string |
20+
| xpathFlow.py:30:18:30:29 | dict of externally controlled string | xpathFlow.py:30:18:30:44 | externally controlled string |
21+
| xpathFlow.py:30:18:30:29 | dict of externally controlled string | xpathFlow.py:30:18:30:44 | externally controlled string |
22+
| xpathFlow.py:30:18:30:44 | externally controlled string | xpathFlow.py:32:29:32:38 | externally controlled string |
23+
| xpathFlow.py:30:18:30:44 | externally controlled string | xpathFlow.py:32:29:32:38 | externally controlled string |
24+
| xpathFlow.py:39:18:39:29 | dict of externally controlled string | xpathFlow.py:39:18:39:44 | externally controlled string |
25+
| xpathFlow.py:39:18:39:29 | dict of externally controlled string | xpathFlow.py:39:18:39:44 | externally controlled string |
26+
| xpathFlow.py:39:18:39:44 | externally controlled string | xpathFlow.py:41:31:41:40 | externally controlled string |
27+
| xpathFlow.py:39:18:39:44 | externally controlled string | xpathFlow.py:41:31:41:40 | externally controlled string |
28+
| xpathFlow.py:47:18:47:29 | dict of externally controlled string | xpathFlow.py:47:18:47:44 | externally controlled string |
29+
| xpathFlow.py:47:18:47:29 | dict of externally controlled string | xpathFlow.py:47:18:47:44 | externally controlled string |
30+
| xpathFlow.py:47:18:47:44 | externally controlled string | xpathFlow.py:49:29:49:38 | externally controlled string |
31+
| xpathFlow.py:47:18:47:44 | externally controlled string | xpathFlow.py:49:29:49:38 | externally controlled string |
32+
#select
33+
| xpathBad.py:13:20:13:43 | BinaryExpr | xpathBad.py:9:7:9:13 | django.request.HttpRequest | xpathBad.py:13:20:13:43 | externally controlled string | This Xpath query depends on $@. | xpathBad.py:9:7:9:13 | request | a user-provided value |
34+
| xpathFlow.py:14:20:14:29 | xpathQuery | xpathFlow.py:11:18:11:29 | dict of externally controlled string | xpathFlow.py:14:20:14:29 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:11:18:11:29 | Attribute | a user-provided value |
35+
| xpathFlow.py:23:29:23:38 | xpathQuery | xpathFlow.py:20:18:20:29 | dict of externally controlled string | xpathFlow.py:23:29:23:38 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:20:18:20:29 | Attribute | a user-provided value |
36+
| xpathFlow.py:32:29:32:38 | xpathQuery | xpathFlow.py:30:18:30:29 | dict of externally controlled string | xpathFlow.py:32:29:32:38 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:30:18:30:29 | Attribute | a user-provided value |
37+
| xpathFlow.py:41:31:41:40 | xpathQuery | xpathFlow.py:39:18:39:29 | dict of externally controlled string | xpathFlow.py:41:31:41:40 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:39:18:39:29 | Attribute | a user-provided value |
38+
| xpathFlow.py:49:29:49:38 | xpathQuery | xpathFlow.py:47:18:47:29 | dict of externally controlled string | xpathFlow.py:49:29:49:38 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:47:18:47:29 | Attribute | a user-provided value |
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from lxml import etree
2+
from io import StringIO
3+
4+
5+
def a():
6+
f = StringIO('<foo><bar></bar></foo>')
7+
tree = etree.parse(f)
8+
r = tree.xpath('/foo/bar')
9+
10+
11+
def b():
12+
root = etree.XML("<root><a>TEXT</a></root>")
13+
find_text = etree.XPath("//text()")
14+
text = find_text(root)[0]
15+
16+
17+
def c():
18+
root = etree.XML("<root><a>TEXT</a></root>")
19+
find_text = etree.XPath("//text()", smart_strings=False)
20+
text = find_text(root)[0]
21+
22+
23+
def d():
24+
root = etree.XML("<root><a>TEXT</a></root>")
25+
find_text = find = etree.ETXPath("//{ns}b")
26+
text = find_text(root)[0]
27+
28+
29+
def e():
30+
import libxml2
31+
doc = libxml2.parseFile('xpath_injection/credential.xml')
32+
results = doc.xpathEval('sink')
33+
34+
35+
if __name__ == "__main__":
36+
a()
37+
b()
38+
c()
39+
d()
40+
e()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
experimental/CWE-643/xpath.ql
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from lxml import etree
2+
from io import StringIO
3+
4+
from django.urls import path
5+
from django.http import HttpResponse
6+
from django.template import Template, Context, Engine, engines
7+
8+
9+
def a(request):
10+
value = request.GET['xpath']
11+
f = StringIO('<foo><bar></bar></foo>')
12+
tree = etree.parse(f)
13+
r = tree.xpath("/tag[@id='%s']" % value)
14+
15+
16+
urlpatterns = [
17+
path('a', a)
18+
]

0 commit comments

Comments
 (0)