Skip to content

Commit 9d542f1

Browse files
authored
Merge pull request #10887 from Sim4n6/TarSlipImprov
Python: Add TarSlip Improv query
2 parents b9f4856 + 92a3846 commit 9d542f1

File tree

5 files changed

+642
-0
lines changed

5 files changed

+642
-0
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>Extracting files from a malicious tarball without validating that the destination file path
8+
is within the destination directory can cause files outside the destination directory to be
9+
overwritten, due to the possible presence of directory traversal elements (<code>..</code>) in
10+
archive path names.</p>
11+
12+
<p>Tarball contain archive entries representing each file in the archive. These entries
13+
include a file path for the entry, but these file paths are not restricted and may contain
14+
unexpected special elements such as the directory traversal element (<code>..</code>). If these
15+
file paths are used to determine an output file to write the contents of the archive item to, then
16+
the file may be written to an unexpected location. This can result in sensitive information being
17+
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
18+
files.</p>
19+
20+
<p>For example, if a tarball contains a file entry <code>../sneaky-file</code>, and the tarball
21+
is extracted to the directory <code>/tmp/tmp123</code>, then naively combining the paths would result
22+
in an output file path of <code>/tmp/tmp123/../sneaky-file</code>, which would cause the file to be
23+
written to <code>/tmp/</code>.</p>
24+
25+
</overview>
26+
<recommendation>
27+
28+
<p>Ensure that output paths constructed from tarball entries are validated
29+
to prevent writing files to unexpected locations.</p>
30+
31+
<p>The recommended way of writing an output file from a tarball entry is to call <code>extract()</code> or <code>extractall()</code>.
32+
</p>
33+
34+
</recommendation>
35+
36+
<example>
37+
<p>
38+
In this example an archive is extracted without validating file paths.
39+
</p>
40+
41+
<sample src="examples/TarSlip_1.py" />
42+
43+
<p>To fix this vulnerability, we need to call the function <code>extractall()</code>.
44+
</p>
45+
46+
<sample src="examples/NoHIT_TarSlip_1.py" />
47+
48+
</example>
49+
<references>
50+
<li>
51+
Snyk:
52+
<a href="https://snyk.io/research/zip-slip-vulnerability">Zip Slip Vulnerability</a>.
53+
</li>
54+
55+
<li>
56+
Tarfile documentation
57+
<a href="https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall">extractall() warning</a>
58+
</li>
59+
</references>
60+
</qhelp>
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/**
2+
* @name Arbitrary file write during tarfile extraction
3+
* @description Extracting files from a malicious tar archive without validating that the
4+
* destination file path is within the destination directory can cause files outside
5+
* the destination directory to be overwritten.
6+
* @kind path-problem
7+
* @id py/tarslip
8+
* @problem.severity error
9+
* @security-severity 7.5
10+
* @precision high
11+
* @tags security
12+
* external/cwe/cwe-022
13+
*/
14+
15+
import python
16+
import semmle.python.dataflow.new.DataFlow
17+
import semmle.python.dataflow.new.TaintTracking
18+
import DataFlow::PathGraph
19+
import semmle.python.ApiGraphs
20+
import semmle.python.dataflow.new.internal.Attributes
21+
import semmle.python.dataflow.new.BarrierGuards
22+
import semmle.python.dataflow.new.RemoteFlowSources
23+
24+
/**
25+
* Handle those three cases of Tarfile opens:
26+
* - `tarfile.open()`
27+
* - `tarfile.TarFile()`
28+
* - `MKtarfile.Tarfile.open()`
29+
*/
30+
API::Node tarfileOpen() {
31+
result in [
32+
API::moduleImport("tarfile").getMember(["open", "TarFile"]),
33+
API::moduleImport("tarfile").getMember("TarFile").getASubclass().getMember("open")
34+
]
35+
}
36+
37+
/**
38+
* Handle the previous three cases, plus the use of `closing` in the previous cases
39+
*/
40+
class AllTarfileOpens extends API::CallNode {
41+
AllTarfileOpens() {
42+
this = tarfileOpen().getACall()
43+
or
44+
exists(API::Node closing, Node arg |
45+
closing = API::moduleImport("contextlib").getMember("closing") and
46+
this = closing.getACall() and
47+
arg = this.getArg(0) and
48+
arg = tarfileOpen().getACall()
49+
)
50+
}
51+
}
52+
53+
/**
54+
* A taint-tracking configuration for detecting more "TarSlip" vulnerabilities.
55+
*/
56+
class Configuration extends TaintTracking::Configuration {
57+
Configuration() { this = "TarSlip" }
58+
59+
override predicate isSource(DataFlow::Node source) { source = tarfileOpen().getACall() }
60+
61+
override predicate isSink(DataFlow::Node sink) {
62+
(
63+
// A sink capturing method calls to `extractall` without `members` argument.
64+
// For a call to `file.extractall` without `members` argument, `file` is considered a sink.
65+
exists(MethodCallNode call, AllTarfileOpens atfo |
66+
call = atfo.getReturn().getMember("extractall").getACall() and
67+
not exists(Node arg | arg = call.getArgByName("members")) and
68+
sink = call.getObject()
69+
)
70+
or
71+
// A sink capturing method calls to `extractall` with `members` argument.
72+
// For a call to `file.extractall` with `members` argument, `file` is considered a sink if not
73+
// a the `members` argument contains a NameConstant as None, a List or call to the method `getmembers`.
74+
// Otherwise, the argument of `members` is considered a sink.
75+
exists(MethodCallNode call, Node arg, AllTarfileOpens atfo |
76+
call = atfo.getReturn().getMember("extractall").getACall() and
77+
arg = call.getArgByName("members") and
78+
if
79+
arg.asCfgNode() instanceof NameConstantNode or
80+
arg.asCfgNode() instanceof ListNode
81+
then sink = call.getObject()
82+
else
83+
if arg.(MethodCallNode).getMethodName() = "getmembers"
84+
then sink = arg.(MethodCallNode).getObject()
85+
else sink = call.getArgByName("members")
86+
)
87+
or
88+
// An argument to `extract` is considered a sink.
89+
exists(AllTarfileOpens atfo |
90+
sink = atfo.getReturn().getMember("extract").getACall().getArg(0)
91+
)
92+
or
93+
//An argument to `_extract_member` is considered a sink.
94+
exists(MethodCallNode call, AllTarfileOpens atfo |
95+
call = atfo.getReturn().getMember("_extract_member").getACall() and
96+
call.getArg(1).(AttrRead).accesses(sink, "name")
97+
)
98+
) and
99+
not sink.getScope().getLocation().getFile().inStdlib()
100+
}
101+
102+
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
103+
exists(AttrRead attr, MethodCallNode call |
104+
attr.accesses(nodeFrom, "getmembers") and
105+
nodeFrom = call.getObject() and
106+
nodeFrom instanceof AllTarfileOpens and
107+
nodeTo = call
108+
)
109+
or
110+
exists(API::CallNode closing |
111+
closing = API::moduleImport("contextlib").getMember("closing").getACall() and
112+
nodeFrom = closing.getArg(0) and
113+
nodeFrom = tarfileOpen().getReturn().getAValueReachingSink() and
114+
nodeTo = closing
115+
)
116+
}
117+
}
118+
119+
from Configuration config, DataFlow::PathNode source, DataFlow::PathNode sink
120+
where config.hasFlowPath(source, sink)
121+
select sink, source, sink, "Extraction of tarfile from $@ to a potentially untrusted source $@.",
122+
source.getNode(), source.getNode().toString(), sink.getNode(), sink.getNode().toString()

0 commit comments

Comments
 (0)