Skip to content

Commit 8c5a971

Browse files
author
Porcupiney Hairs
committed
Python : Add Xpath injection query
This PR adds support for detecting XPATH injection in Python. I have included the ql files as well as the tests with this.
1 parent 593d4c0 commit 8c5a971

File tree

14 files changed

+324
-0
lines changed

14 files changed

+324
-0
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from lxml import etree
2+
from io import StringIO
3+
4+
from django.urls import path
5+
from django.http import HttpResponse
6+
from django.template import Template, Context, Engine, engines
7+
8+
9+
def a(request):
10+
xpathQuery = request.GET['xpath']
11+
f = StringIO('<foo><bar></bar></foo>')
12+
tree = etree.parse(f)
13+
r = tree.xpath(xpathQuery)
14+
15+
16+
urlpatterns = [
17+
path('a', a)
18+
]
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
<!DOCTYPE qhelp SYSTEM "qhelp.dtd">
2+
<qhelp>
3+
<overview>
4+
Using user-supplied information to construct an XPath query for XML data can
5+
result in an XPath injection flaw. By sending intentionally malformed information,
6+
an attacker can access data that he may not normally have access to.
7+
He/She may even be able to elevate his privileges on the web site if the XML data
8+
is being used for authentication (such as an XML based user file).
9+
</overview>
10+
<recommendation>
11+
<p>
12+
XPath injection can be prevented using parameterized XPath interface or escaping the user input to make it safe to include in a dynamically constructed query.
13+
If you are using quotes to terminate untrusted input in a dynamically constructed XPath query, then you need to escape that quote in the untrusted input to ensure the untrusted data can’t try to break out of that quoted context.
14+
</p>
15+
<p>
16+
Another better mitigation option is to use a precompiled XPath query. Precompiled XPath queries are already preset before the program executes, rather than created on the fly after the user’s input has been added to the string. This is a better route because you don’t have to worry about missing a character that should have been escaped.
17+
</p>
18+
<example>
19+
20+
<p>In the example below, the xpath query is controlled by the user and hence leads to a vulnerability.</p>
21+
22+
<sample src="xpath.py" />
23+
</example>
24+
<references>
25+
<li>OWASP XPath injection : <a href="https://owasp.org/www-community/attacks/XPATH_Injection"></a>/>> </li>
26+
</references>
27+
28+
29+
</recommendation>
30+
31+
32+
</qhelp>
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/**
2+
* @name XPath query built from user-controlled sources
3+
* @description Building a XPath query from user-controlled sources is vulnerable to insertion of
4+
* malicious Xpath code by the user.
5+
* @kind path-problem
6+
* @problem.severity error
7+
* @precision high
8+
* @id py/xpath-injection
9+
* @tags security
10+
* external/cwe/cwe-643
11+
*/
12+
13+
import python
14+
import semmle.python.security.Paths
15+
/* Sources */
16+
import semmle.python.web.HttpRequest
17+
/* Sinks */
18+
import experimental.semmle.python.security.injection.Xpath
19+
20+
class XpathInjectionConfiguration extends TaintTracking::Configuration {
21+
XpathInjectionConfiguration() { this = "Xpath injection configuration" }
22+
23+
override predicate isSource(TaintTracking::Source source) {
24+
source instanceof HttpRequestTaintSource
25+
}
26+
27+
override predicate isSink(TaintTracking::Sink sink) {
28+
sink instanceof XpathInjection::XpathInjectionSink
29+
}
30+
}
31+
32+
from XpathInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
33+
where config.hasFlowPath(src, sink)
34+
select sink.getSink(), src, sink, "This Xpath query depends on $@.", src.getSource(),
35+
"a user-provided value"
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/**
2+
* Provides class and predicates to track external data that
3+
* may represent malicious xpath query objects.
4+
*
5+
* This module is intended to be imported into a taint-tracking query
6+
* to extend `TaintKind` and `TaintSink`.
7+
*/
8+
9+
import python
10+
import semmle.python.security.TaintTracking
11+
import semmle.python.web.HttpRequest
12+
13+
/** Models Xpath Injection related classes and functions */
14+
module XpathInjection {
15+
/** Returns a class value which refers to `lxml.etree` */
16+
Value etree() { result = Value::named("lxml.etree") }
17+
18+
/** A generic taint sink that is vulnerable to Xpath injection. */
19+
abstract class XpathInjectionSink extends TaintSink { }
20+
21+
/**
22+
* A Sink representing an argument to the `etree.Xpath` call.
23+
*
24+
* from lxml import etree
25+
* root = etree.XML("<xmlContent>")
26+
* find_text = etree.XPath("`sink`")
27+
*/
28+
private class EtreeXpathArgument extends XpathInjectionSink {
29+
override string toString() { result = "lxml.etree.Xpath" }
30+
31+
EtreeXpathArgument() {
32+
exists(CallNode call, AttrNode atr |
33+
atr = etree().getAReference().getASuccessor() and
34+
atr.getName() = "XPath" and
35+
atr = call.getFunction()
36+
|
37+
call.getArg(0) = this
38+
)
39+
}
40+
41+
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
42+
}
43+
44+
/**
45+
* A Sink representing an argument to the `etree.EtXpath` call.
46+
*
47+
* from lxml import etree
48+
* root = etree.XML("<xmlContent>")
49+
* find_text = etree.EtXPath("`sink`")
50+
*/
51+
private class EtreeETXpathArgument extends XpathInjectionSink {
52+
override string toString() { result = "lxml.etree.ETXpath" }
53+
54+
EtreeETXpathArgument() {
55+
exists(CallNode call, AttrNode atr |
56+
atr = etree().getAReference().getASuccessor() and
57+
atr.getName() = "ETXPath" and
58+
atr = call.getFunction()
59+
|
60+
call.getArg(0) = this
61+
)
62+
}
63+
64+
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
65+
}
66+
67+
/**
68+
* A Sink representing an argument to the `xpath` call to a parsed xml document.
69+
*
70+
* from lxml import etree
71+
* from io import StringIO
72+
* f = StringIO('<foo><bar></bar></foo>')
73+
* tree = etree.parse(f)
74+
* r = tree.xpath('`sink`')
75+
*/
76+
private class ParseXpathArgument extends XpathInjectionSink {
77+
override string toString() { result = "lxml.etree.parse.xpath" }
78+
79+
ParseXpathArgument() {
80+
exists(CallNode parseCall, AttrNode parse, string s |
81+
parse = etree().getAReference().getASuccessor() and
82+
parse.getName() = "parse" and
83+
parse = parseCall.getFunction() and
84+
exists(CallNode xpathCall, AttrNode xpath |
85+
xpath = parseCall.getASuccessor*() and
86+
xpath.getName() = "xpath" and
87+
xpath = xpathCall.getFunction() and
88+
s = xpath.getName() and
89+
this = xpathCall.getArg(0)
90+
)
91+
)
92+
}
93+
94+
override predicate sinks(TaintKind kind) { kind instanceof ExternalStringKind }
95+
}
96+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
semmle-extractor-options: --max-import-depth=3 -p ../../../query-tests/Security/lib/
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from lxml import etree
2+
from io import StringIO
3+
4+
5+
def a():
6+
f = StringIO('<foo><bar></bar></foo>')
7+
tree = etree.parse(f)
8+
r = tree.xpath('/foo/bar')
9+
10+
11+
def b():
12+
root = etree.XML("<root><a>TEXT</a></root>")
13+
find_text = etree.XPath("//text()")
14+
text = find_text(root)[0]
15+
16+
17+
def c():
18+
root = etree.XML("<root><a>TEXT</a></root>")
19+
find_text = etree.XPath("//text()", smart_strings=False)
20+
text = find_text(root)[0]
21+
22+
23+
def d():
24+
root = etree.XML("<root><a>TEXT</a></root>")
25+
find_text = find = etree.ETXPath("//{ns}b")
26+
text = find_text(root)[0]
27+
28+
29+
if __name__ == "__main__":
30+
a()
31+
b()
32+
c()
33+
d()
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
| xpath.py:8:20:8:29 | lxml.etree.parse.xpath | externally controlled string |
2+
| xpath.py:13:29:13:38 | lxml.etree.Xpath | externally controlled string |
3+
| xpath.py:19:29:19:38 | lxml.etree.Xpath | externally controlled string |
4+
| xpath.py:25:38:25:46 | lxml.etree.ETXpath | externally controlled string |
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import python
2+
import experimental.semmle.python.security.injection.Xpath
3+
4+
from XpathInjection::XpathInjectionSink sink, TaintKind kind
5+
where sink.sinks(kind)
6+
select sink, kind
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
semmle-extractor-options: --max-import-depth=3 -p ../../query-tests/Security/lib/
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
edges
2+
| xpathFlow.py:10:18:10:29 | dict of externally controlled string | xpathFlow.py:10:18:10:44 | externally controlled string |
3+
| xpathFlow.py:10:18:10:29 | dict of externally controlled string | xpathFlow.py:10:18:10:44 | externally controlled string |
4+
| xpathFlow.py:10:18:10:44 | externally controlled string | xpathFlow.py:13:20:13:29 | externally controlled string |
5+
| xpathFlow.py:10:18:10:44 | externally controlled string | xpathFlow.py:13:20:13:29 | externally controlled string |
6+
| xpathFlow.py:18:18:18:29 | dict of externally controlled string | xpathFlow.py:18:18:18:44 | externally controlled string |
7+
| xpathFlow.py:18:18:18:29 | dict of externally controlled string | xpathFlow.py:18:18:18:44 | externally controlled string |
8+
| xpathFlow.py:18:18:18:44 | externally controlled string | xpathFlow.py:21:29:21:38 | externally controlled string |
9+
| xpathFlow.py:18:18:18:44 | externally controlled string | xpathFlow.py:21:29:21:38 | externally controlled string |
10+
| xpathFlow.py:27:18:27:29 | dict of externally controlled string | xpathFlow.py:27:18:27:44 | externally controlled string |
11+
| xpathFlow.py:27:18:27:29 | dict of externally controlled string | xpathFlow.py:27:18:27:44 | externally controlled string |
12+
| xpathFlow.py:27:18:27:44 | externally controlled string | xpathFlow.py:29:29:29:38 | externally controlled string |
13+
| xpathFlow.py:27:18:27:44 | externally controlled string | xpathFlow.py:29:29:29:38 | externally controlled string |
14+
| xpathFlow.py:35:18:35:29 | dict of externally controlled string | xpathFlow.py:35:18:35:44 | externally controlled string |
15+
| xpathFlow.py:35:18:35:29 | dict of externally controlled string | xpathFlow.py:35:18:35:44 | externally controlled string |
16+
| xpathFlow.py:35:18:35:44 | externally controlled string | xpathFlow.py:37:38:37:47 | externally controlled string |
17+
| xpathFlow.py:35:18:35:44 | externally controlled string | xpathFlow.py:37:38:37:47 | externally controlled string |
18+
#select
19+
| xpathFlow.py:13:20:13:29 | xpathQuery | xpathFlow.py:10:18:10:29 | dict of externally controlled string | xpathFlow.py:13:20:13:29 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:10:18:10:29 | Attribute | a user-provided value |
20+
| xpathFlow.py:21:29:21:38 | xpathQuery | xpathFlow.py:18:18:18:29 | dict of externally controlled string | xpathFlow.py:21:29:21:38 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:18:18:18:29 | Attribute | a user-provided value |
21+
| xpathFlow.py:29:29:29:38 | xpathQuery | xpathFlow.py:27:18:27:29 | dict of externally controlled string | xpathFlow.py:29:29:29:38 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:27:18:27:29 | Attribute | a user-provided value |
22+
| xpathFlow.py:37:38:37:47 | xpathQuery | xpathFlow.py:35:18:35:29 | dict of externally controlled string | xpathFlow.py:37:38:37:47 | externally controlled string | This Xpath query depends on $@. | xpathFlow.py:35:18:35:29 | Attribute | a user-provided value |

0 commit comments

Comments
 (0)