Skip to content

Commit d2ab40c

Browse files
authored
Merge pull request github#12208 from gregxsunday/main
Add ZipSlip and TarSlip query to ruby
2 parents b8e123d + 48007d1 commit d2ab40c

File tree

10 files changed

+513
-0
lines changed

10 files changed

+513
-0
lines changed
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
/**
2+
* Provides default sources, sinks and sanitizers for reasoning about
3+
* zip slip vulnerabilities, as well as extension points for
4+
* adding your own.
5+
*/
6+
7+
private import codeql.ruby.AST
8+
private import codeql.ruby.ApiGraphs
9+
private import codeql.ruby.CFG
10+
private import codeql.ruby.Concepts
11+
private import codeql.ruby.DataFlow
12+
private import codeql.ruby.dataflow.BarrierGuards
13+
private import codeql.ruby.dataflow.RemoteFlowSources
14+
15+
/**
16+
* Provides default sources, sinks and sanitizers for reasoning about
17+
* zip slip vulnerabilities, as well as extension points for
18+
* adding your own.
19+
*/
20+
module ZipSlip {
21+
/**
22+
* A data flow source for zip slip vulnerabilities.
23+
*/
24+
abstract class Source extends DataFlow::Node { }
25+
26+
/**
27+
* A data flow sink for zip slip vulnerabilities.
28+
*/
29+
abstract class Sink extends DataFlow::Node { }
30+
31+
/**
32+
* A sanitizer for zip slip vulnerabilities.
33+
*/
34+
abstract class Sanitizer extends DataFlow::Node { }
35+
36+
/**
37+
* A file system access, considered as a flow sink.
38+
*/
39+
class FileSystemAccessAsSink extends Sink {
40+
FileSystemAccessAsSink() { this = any(FileSystemAccess e).getAPathArgument() }
41+
}
42+
43+
/**
44+
* A call to `Zlib::GzipReader.open(path)`, considered a flow source.
45+
*/
46+
private class GzipReaderOpen extends Source {
47+
GzipReaderOpen() {
48+
(
49+
this = API::getTopLevelMember("Zlib").getMember("GzipReader").getReturn("open").asSource()
50+
or
51+
this = API::getTopLevelMember("Zlib").getMember("GzipReader").getInstance().asSource()
52+
) and
53+
// If argument refers to a string object, then it's a hardcoded path and
54+
// this file is safe.
55+
not this.(DataFlow::CallNode)
56+
.getArgument(0)
57+
.getALocalSource()
58+
.getConstantValue()
59+
.isStringlikeValue(_)
60+
}
61+
}
62+
63+
/**
64+
* A call to `Gem::Package::TarReader.new(file_stream)`, considered a flow source.
65+
*/
66+
private class TarReaderInstance extends Source {
67+
TarReaderInstance() {
68+
exists(API::MethodAccessNode newTarReader |
69+
newTarReader =
70+
API::getTopLevelMember("Gem").getMember("Package").getMember("TarReader").getMethod("new")
71+
|
72+
// Unlike in two other modules, there's no check for the constant path because TarReader class is called with an `io` object and not filepath.
73+
// This, of course, can be modeled but probably in the internal IO.qll file
74+
// For now, I'm leaving this prone to false-positives
75+
not exists(newTarReader.getBlock()) and this = newTarReader.getReturn().asSource()
76+
or
77+
this = newTarReader.getBlock().getParameter(0).asSource()
78+
)
79+
}
80+
}
81+
82+
/**
83+
* A call to `Zip::File.open(path)`, considered a flow source.
84+
*/
85+
private class ZipFileOpen extends Source {
86+
ZipFileOpen() {
87+
exists(API::MethodAccessNode zipOpen |
88+
zipOpen = API::getTopLevelMember("Zip").getMember("File").getMethod("open") and
89+
// If argument refers to a string object, then it's a hardcoded path and
90+
// this file is safe.
91+
not zipOpen
92+
.getCallNode()
93+
.getArgument(0)
94+
.getALocalSource()
95+
.getConstantValue()
96+
.isStringlikeValue(_)
97+
|
98+
// the case with variable assignment `zip_file = Zip::File.open(path)`
99+
not exists(zipOpen.getBlock()) and this = zipOpen.getReturn().asSource()
100+
or
101+
// the case with direct block`Zip::File.open(path) do |zip_file|` case
102+
this = zipOpen.getBlock().getParameter(0).asSource()
103+
)
104+
}
105+
}
106+
107+
/**
108+
* A comparison with a constant string, considered as a sanitizer-guard.
109+
*/
110+
private class StringConstCompareAsSanitizer extends Sanitizer, StringConstCompareBarrier { }
111+
112+
/**
113+
* An inclusion check against an array of constant strings, considered as a
114+
* sanitizer-guard.
115+
*/
116+
private class StringConstArrayInclusionCallAsSanitizer extends Sanitizer,
117+
StringConstArrayInclusionCallBarrier { }
118+
119+
/**
120+
* A sanitizer like `File.expand_path(path).start_with?` where `path` is a path of a single entry inside the archive.
121+
* It is assumed that if `File.expand_path` is called, it is to verify the path is safe so there's no modeling of `start_with?` or other comparisons to avoid false-negatives.
122+
*/
123+
private class ExpandedPathStartsWithAsSanitizer extends Sanitizer {
124+
ExpandedPathStartsWithAsSanitizer() {
125+
exists(DataFlow::CallNode cn |
126+
cn.getMethodName() = "expand_path" and
127+
this = cn.getArgument(0)
128+
)
129+
}
130+
}
131+
132+
/**
133+
* Existing PathSanitization model created for regular path traversals
134+
*/
135+
private class PathSanitizationAsSanitizer extends Sanitizer instanceof Path::PathSanitization { }
136+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/**
2+
* Provides a taint tracking configuration for reasoning about
3+
* zip slip vulnerabilities.
4+
*/
5+
6+
import ZipSlipCustomizations
7+
private import codeql.ruby.Concepts
8+
private import codeql.ruby.DataFlow
9+
private import codeql.ruby.TaintTracking
10+
private import codeql.ruby.ApiGraphs
11+
12+
/**
13+
* A taint-tracking configuration for reasoning about zip slip
14+
* vulnerabilities.
15+
*/
16+
class Configuration extends TaintTracking::Configuration {
17+
Configuration() { this = "ZipSlip" }
18+
19+
override predicate isSource(DataFlow::Node source) { source instanceof ZipSlip::Source }
20+
21+
override predicate isSink(DataFlow::Node sink) { sink instanceof ZipSlip::Sink }
22+
23+
/**
24+
* This should actually be
25+
* `and cn = API::getTopLevelMember("Gem").getMember("Package").getMember("TarReader").getMember("Entry").getAMethodCall("full_name")` and similar for other classes
26+
* but I couldn't make it work so there's only check for the method name called on the entry. It is `full_name` for `Gem::Package::TarReader::Entry` and `Zlib`
27+
* and `name` for `Zip::File`
28+
*/
29+
override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
30+
exists(DataFlow::CallNode cn |
31+
cn.getReceiver() = nodeFrom and
32+
cn.getMethodName() in ["full_name", "name"] and
33+
cn = nodeTo
34+
)
35+
}
36+
37+
override predicate isSanitizer(DataFlow::Node node) { node instanceof ZipSlip::Sanitizer }
38+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: newQuery
3+
---
4+
* Added a new query, `rb/zip-slip`, to detect arbitrary file writes during extraction of zip/tar archives.
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>Extracting files from a malicious tar archive without validating that the destination file path
8+
is within the destination directory can cause files outside the destination directory to be
9+
overwritten, due to the possible presence of directory traversal elements (<code>..</code>) in
10+
archive paths.</p>
11+
12+
<p>Tar archives contain archive entries representing each file in the archive. These entries
13+
include a file path for the entry, but these file paths are not restricted and may contain
14+
unexpected special elements such as the directory traversal element (<code>..</code>). If these
15+
file paths are used to determine an output file to write the contents of the archive item to, then
16+
the file may be written to an unexpected location. This can result in sensitive information being
17+
revealed or deleted, or an attacker being able to influence behavior by modifying unexpected
18+
files.</p>
19+
20+
<p>For example, if a tar archive contains a file entry <code>..\sneaky-file</code>, and the tar archive
21+
is extracted to the directory <code>c:\output</code>, then naively combining the paths would result
22+
in an output file path of <code>c:\output\..\sneaky-file</code>, which would cause the file to be
23+
written to <code>c:\sneaky-file</code>.</p>
24+
25+
</overview>
26+
<recommendation>
27+
28+
<p>Ensure that output paths constructed from tar archive entries are validated
29+
to prevent writing files to unexpected locations.</p>
30+
31+
<p>The recommended way of writing an output file from a tar archive entry is to check that
32+
<code>".."</code> does not occur in the path.
33+
</p>
34+
35+
</recommendation>
36+
37+
<example>
38+
<p>
39+
In this example an archive is extracted without validating file paths.
40+
If <code>archive.tar</code> contained relative paths (for
41+
instance, if it were created by something like <code>tar -cf archive.tar
42+
../file.txt</code>) then executing this code could write to locations
43+
outside the destination directory.
44+
</p>
45+
46+
<sample src="examples/zip_slip_bad.rb" />
47+
48+
<p>To fix this vulnerability, we need to check that the path does not
49+
contain any <code>".."</code> elements in it.
50+
</p>
51+
52+
<sample src="examples/zip_slip_good.rb" />
53+
54+
</example>
55+
<references>
56+
57+
<li>
58+
Snyk:
59+
<a href="https://snyk.io/research/zip-slip-vulnerability">Zip Slip Vulnerability</a>.
60+
</li>
61+
<li>
62+
OWASP:
63+
<a href="https://owasp.org/www-community/attacks/Path_Traversal">Path Traversal</a>.
64+
</li>
65+
<li>
66+
class
67+
<a href="https://docs.ruby-lang.org/en/2.4.0/Gem/Package/TarReader.html">Gem::Package::TarReader</a>.
68+
</li>
69+
<li>
70+
class
71+
<a href="https://ruby-doc.org/stdlib-2.4.0/libdoc/zlib/rdoc/Zlib/GzipReader.html">Zlib::GzipReader</a>.
72+
</li>
73+
<li>
74+
class
75+
<a href="https://www.rubydoc.info/github/rubyzip/rubyzip/Zip/File">Zip::File</a>.
76+
</li>
77+
78+
</references>
79+
</qhelp>
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/**
2+
* @name Arbitrary file write during zipfile/tarfile extraction
3+
* @description Extracting files from a malicious tar archive without validating that the
4+
* destination file path is within the destination directory can cause files outside
5+
* the destination directory to be overwritten.
6+
* @kind path-problem
7+
* @id rb/zip-slip
8+
* @problem.severity error
9+
* @security-severity 7.5
10+
* @precision medium
11+
* @tags security
12+
* external/cwe/cwe-022
13+
*/
14+
15+
import ruby
16+
import codeql.ruby.experimental.ZipSlipQuery
17+
import DataFlow::PathGraph
18+
19+
from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink
20+
where cfg.hasFlowPath(source, sink)
21+
select sink.getNode(), source, sink, "This file extraction depends on a $@.", source.getNode(),
22+
"potentially untrusted source"
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
class FilesController < ActionController::Base
2+
def zipFileUnsafe
3+
path = params[:path]
4+
Zip::File.open(path).each do |entry|
5+
File.open(entry.name, "wb") do |os|
6+
entry.read
7+
end
8+
end
9+
end
10+
11+
def tarReaderUnsafe
12+
path = params[:path]
13+
file_stream = IO.new(IO.sysopen(path))
14+
tarfile = Gem::Package::TarReader.new(file_stream)
15+
tarfile.each do |entry|
16+
::File.open(entry.full_name, "wb") do |os|
17+
entry.read
18+
end
19+
end
20+
end
21+
end
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
class FilesController < ActionController::Base
2+
def zipFileSafe
3+
path = params[:path]
4+
Zip::File.open(path).each do |entry|
5+
entry_path = entry.name
6+
next if !File.expand_path(entry_path).start_with?('/safepath/')
7+
File.open(entry_path, "wb") do |os|
8+
entry.read
9+
end
10+
end
11+
end
12+
13+
def tarReaderSafe
14+
path = params[:path]
15+
file_stream = IO.new(IO.sysopen(path))
16+
tarfile = Gem::Package::TarReader.new(file_stream)
17+
tarfile.each do |entry|
18+
entry_path = entry.full_name
19+
raise ExtractFailed if entry_path != "/safepath"
20+
::File.open(entry_path, "wb") do |os|
21+
entry.read
22+
end
23+
end
24+
end
25+
end
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
edges
2+
| zip_slip.rb:8:15:8:54 | call to new : | zip_slip.rb:9:5:9:11 | tarfile : |
3+
| zip_slip.rb:9:5:9:11 | tarfile : | zip_slip.rb:9:22:9:26 | entry : |
4+
| zip_slip.rb:9:22:9:26 | entry : | zip_slip.rb:10:19:10:33 | call to full_name |
5+
| zip_slip.rb:20:50:20:56 | tarfile : | zip_slip.rb:21:7:21:13 | tarfile : |
6+
| zip_slip.rb:21:7:21:13 | tarfile : | zip_slip.rb:21:30:21:34 | entry : |
7+
| zip_slip.rb:21:30:21:34 | entry : | zip_slip.rb:22:21:22:35 | call to full_name |
8+
| zip_slip.rb:46:5:46:24 | call to open : | zip_slip.rb:46:35:46:39 | entry : |
9+
| zip_slip.rb:46:35:46:39 | entry : | zip_slip.rb:47:17:47:26 | call to name |
10+
| zip_slip.rb:56:30:56:37 | zip_file : | zip_slip.rb:57:7:57:14 | zip_file : |
11+
| zip_slip.rb:57:7:57:14 | zip_file : | zip_slip.rb:57:25:57:29 | entry : |
12+
| zip_slip.rb:57:25:57:29 | entry : | zip_slip.rb:58:19:58:28 | call to name |
13+
| zip_slip.rb:90:12:90:54 | call to open : | zip_slip.rb:91:11:91:14 | gzip : |
14+
| zip_slip.rb:91:11:91:14 | gzip : | zip_slip.rb:97:42:97:56 | compressed_file : |
15+
| zip_slip.rb:97:42:97:56 | compressed_file : | zip_slip.rb:98:7:98:21 | compressed_file : |
16+
| zip_slip.rb:98:7:98:21 | compressed_file : | zip_slip.rb:98:32:98:36 | entry : |
17+
| zip_slip.rb:98:32:98:36 | entry : | zip_slip.rb:100:21:100:30 | entry_path |
18+
| zip_slip.rb:123:12:123:34 | call to new : | zip_slip.rb:124:7:124:8 | gz : |
19+
| zip_slip.rb:124:7:124:8 | gz : | zip_slip.rb:124:19:124:23 | entry : |
20+
| zip_slip.rb:124:19:124:23 | entry : | zip_slip.rb:126:21:126:30 | entry_path |
21+
nodes
22+
| zip_slip.rb:8:15:8:54 | call to new : | semmle.label | call to new : |
23+
| zip_slip.rb:9:5:9:11 | tarfile : | semmle.label | tarfile : |
24+
| zip_slip.rb:9:22:9:26 | entry : | semmle.label | entry : |
25+
| zip_slip.rb:10:19:10:33 | call to full_name | semmle.label | call to full_name |
26+
| zip_slip.rb:20:50:20:56 | tarfile : | semmle.label | tarfile : |
27+
| zip_slip.rb:21:7:21:13 | tarfile : | semmle.label | tarfile : |
28+
| zip_slip.rb:21:30:21:34 | entry : | semmle.label | entry : |
29+
| zip_slip.rb:22:21:22:35 | call to full_name | semmle.label | call to full_name |
30+
| zip_slip.rb:46:5:46:24 | call to open : | semmle.label | call to open : |
31+
| zip_slip.rb:46:35:46:39 | entry : | semmle.label | entry : |
32+
| zip_slip.rb:47:17:47:26 | call to name | semmle.label | call to name |
33+
| zip_slip.rb:56:30:56:37 | zip_file : | semmle.label | zip_file : |
34+
| zip_slip.rb:57:7:57:14 | zip_file : | semmle.label | zip_file : |
35+
| zip_slip.rb:57:25:57:29 | entry : | semmle.label | entry : |
36+
| zip_slip.rb:58:19:58:28 | call to name | semmle.label | call to name |
37+
| zip_slip.rb:90:12:90:54 | call to open : | semmle.label | call to open : |
38+
| zip_slip.rb:91:11:91:14 | gzip : | semmle.label | gzip : |
39+
| zip_slip.rb:97:42:97:56 | compressed_file : | semmle.label | compressed_file : |
40+
| zip_slip.rb:98:7:98:21 | compressed_file : | semmle.label | compressed_file : |
41+
| zip_slip.rb:98:32:98:36 | entry : | semmle.label | entry : |
42+
| zip_slip.rb:100:21:100:30 | entry_path | semmle.label | entry_path |
43+
| zip_slip.rb:123:12:123:34 | call to new : | semmle.label | call to new : |
44+
| zip_slip.rb:124:7:124:8 | gz : | semmle.label | gz : |
45+
| zip_slip.rb:124:19:124:23 | entry : | semmle.label | entry : |
46+
| zip_slip.rb:126:21:126:30 | entry_path | semmle.label | entry_path |
47+
subpaths
48+
#select
49+
| zip_slip.rb:10:19:10:33 | call to full_name | zip_slip.rb:8:15:8:54 | call to new : | zip_slip.rb:10:19:10:33 | call to full_name | This file extraction depends on a $@. | zip_slip.rb:8:15:8:54 | call to new | potentially untrusted source |
50+
| zip_slip.rb:22:21:22:35 | call to full_name | zip_slip.rb:20:50:20:56 | tarfile : | zip_slip.rb:22:21:22:35 | call to full_name | This file extraction depends on a $@. | zip_slip.rb:20:50:20:56 | tarfile | potentially untrusted source |
51+
| zip_slip.rb:47:17:47:26 | call to name | zip_slip.rb:46:5:46:24 | call to open : | zip_slip.rb:47:17:47:26 | call to name | This file extraction depends on a $@. | zip_slip.rb:46:5:46:24 | call to open | potentially untrusted source |
52+
| zip_slip.rb:58:19:58:28 | call to name | zip_slip.rb:56:30:56:37 | zip_file : | zip_slip.rb:58:19:58:28 | call to name | This file extraction depends on a $@. | zip_slip.rb:56:30:56:37 | zip_file | potentially untrusted source |
53+
| zip_slip.rb:100:21:100:30 | entry_path | zip_slip.rb:90:12:90:54 | call to open : | zip_slip.rb:100:21:100:30 | entry_path | This file extraction depends on a $@. | zip_slip.rb:90:12:90:54 | call to open | potentially untrusted source |
54+
| zip_slip.rb:126:21:126:30 | entry_path | zip_slip.rb:123:12:123:34 | call to new : | zip_slip.rb:126:21:126:30 | entry_path | This file extraction depends on a $@. | zip_slip.rb:123:12:123:34 | call to new | potentially untrusted source |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
experimental/cwe-022-zipslip/ZipSlip.ql

0 commit comments

Comments
 (0)